@ -1,12 +1,12 @@
lib/linebuffer.h | 8
src/cut.c | 43 3 ++++++++++++++++++++++++-
src/cut.c | 44 3 ++++++++++++++++++++++++-
src/expand.c | 165 +++++++++
src/fold.c | 290 +++++++++++++++- -
src/join.c | 363 ++++++++++++++++++---
src/pr.c | 435 ++++++++++++++++++++++- --
src/sort.c | 745 +++++++++++++++++++++++++++++++++++++++++---
src/fold.c | 308 ++++++++++++++++ -
src/join.c | 363 ++++++++++++++++++--
src/pr.c | 444 +++++++++++++++++++++++ --
src/sort.c | 763 +++++++++++++++++++++++++++++++++++++++++---
src/unexpand.c | 228 +++++++++++++
src/uniq.c | 265 +++++++++++++++
src/uniq.c | 265 ++++++++++++++-
tests/i18n/sort.sh | 29 +
tests/local.mk | 2
tests/misc/cut.pl | 7
@ -15,16 +15,16 @@
tests/misc/join.pl | 50 ++
tests/misc/sort-mb-tests.sh | 45 ++
tests/misc/sort-merge.pl | 42 ++
tests/misc/sort.pl | 39 ++
tests/misc/sort.pl | 40 ++
tests/misc/unexpand.pl | 39 ++
tests/misc/uniq.pl | 55 +++
tests/pr/pr-tests.pl | 50 ++
21 files changed, 3198 insertions(+), 182 deletions(-)
tests/pr/pr-tests.pl | 49 ++
21 files changed, 3255 insertions(+), 180 deletions(-)
Index: lib/linebuffer.h
===================================================================
--- lib/linebuffer.h.orig 2015-06-16 07:00:37.000000000 +0200
+++ lib/linebuffer.h 2015-07-09 17:15:19.619057660 +0200
--- lib/linebuffer.h.orig
+++ lib/linebuffer.h
@@ -21,6 +21,11 @@
# include <stdio.h>
@ -49,8 +49,8 @@ Index: lib/linebuffer.h
/* Initialize linebuffer LINEBUFFER for use. */
Index: src/cut.c
===================================================================
--- src/cut.c.orig 2015-06-26 19:05:22.000000000 +0200
+++ src/cut.c 2015-07-09 17:15:19.619057660 +0200
--- src/cut.c.orig
+++ src/cut.c
@@ -28,6 +28,11 @@
#include <assert.h>
#include <getopt.h>
@ -283,7 +283,7 @@ Index: src/cut.c
/* Read from stream STREAM, printing to standard output any selected fields. */
static void
@@ -648,13 +805,2 0 1 @@ cut_fields (FILE *stream)
@@ -648,13 +805,2 1 1 @@ cut_fields (FILE *stream)
}
}
@ -467,9 +467,19 @@ Index: src/cut.c
+
+ case field_mode:
+ if (delimlen == 1)
+ cut_fields (stream);
+ else
+ cut_fields_mb (stream);
+ {
+ /* Check if we have utf8 multibyte locale, so we can use this
+ optimization because of uniqueness of characters, which is
+ not true for e.g. SJIS */
+ char * loc = setlocale(LC_CTYPE, NULL);
+ if (loc && (strstr (loc, "UTF-8") || strstr (loc, "utf-8") ||
+ strstr (loc, "UTF8") || strstr (loc, "utf8")))
+ {
+ cut_fields (stream);
+ break;
+ }
+ }
+ cut_fields_mb (stream);
+ break;
+
+ default:
@ -488,7 +498,7 @@ Index: src/cut.c
}
/* Process file FILE to standard output.
@@ -706,6 +10 5 1,7 @@ main (int argc, char **argv)
@@ -706,6 +10 6 1,7 @@ main (int argc, char **argv)
bool ok;
bool delim_specified = false;
char *spec_list_string IF_LINT ( = NULL);
@ -496,7 +506,7 @@ Index: src/cut.c
initialize_main (&argc, &argv);
set_program_name (argv[0]);
@@ -728,7 +10 7 4,6 @@ main (int argc, char **argv)
@@ -728,7 +10 8 4,6 @@ main (int argc, char **argv)
switch (optc)
{
case 'b':
@ -504,7 +514,7 @@ Index: src/cut.c
/* Build the byte list. */
if (operating_mode != undefined_mode)
FATAL_ERROR (_("only one type of list may be specified"));
@@ -736,6 +10 8 1,14 @@ main (int argc, char **argv)
@@ -736,6 +10 9 1,14 @@ main (int argc, char **argv)
spec_list_string = optarg;
break;
@ -519,7 +529,7 @@ Index: src/cut.c
case 'f':
/* Build the field list. */
if (operating_mode != undefined_mode)
@@ -747,10 +11 0 0,38 @@ main (int argc, char **argv)
@@ -747,10 +11 1 0,38 @@ main (int argc, char **argv)
case 'd':
/* New delimiter. */
/* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
@ -562,7 +572,7 @@ Index: src/cut.c
break;
case OUTPUT_DELIMITER_OPTION:
@@ -763,6 +11 4 4,7 @@ main (int argc, char **argv)
@@ -763,6 +11 5 4,7 @@ main (int argc, char **argv)
break;
case 'n':
@ -570,7 +580,7 @@ Index: src/cut.c
break;
case 's':
@@ -802,15 +11 8 4,34 @@ main (int argc, char **argv)
@@ -802,15 +11 9 4,34 @@ main (int argc, char **argv)
}
if (!delim_specified)
@ -613,8 +623,8 @@ Index: src/cut.c
if (optind == argc)
Index: src/expand.c
===================================================================
--- src/expand.c.orig 2015-06-26 19:05:22.000000000 +0200
+++ src/expand.c 2015-07-09 17:15:19.619057660 +0200
--- src/expand.c.orig
+++ src/expand.c
@@ -37,12 +37,34 @@
#include <stdio.h>
#include <getopt.h>
@ -809,20 +819,43 @@ Index: src/expand.c
error (EXIT_FAILURE, errno, "-");
Index: src/fold.c
===================================================================
--- src/fold.c.orig 2015-06-26 19:05:22.000000000 +0200
+++ src/fold.c 2015-07-09 17:17:59.750295812 +0200
@@ -18,6 +18,10 @@
#include <config.h>
+#if HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
#include <stdio.h>
--- src/fold.c.orig
+++ src/fold.c
@@ -22,11 +22,33 @@
#include <getopt.h>
#include <sys/types.h>
@@ -34,20 +38,41 @@
+/* Get mbstate_t, mbrtowc(), wcwidth(). */
+#if HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+
+/* Get iswprint(), iswblank(), wcwidth(). */
+#if HAVE_WCTYPE_H
+# include <wctype.h>
+#endif
+
#include "system.h"
#include "error.h"
#include "fadvise.h"
#include "xdectoint.h"
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
+ installation; work around this configuration error. */
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
+# undef MB_LEN_MAX
+# define MB_LEN_MAX 16
+#endif
+
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
+#if HAVE_MBRTOWC && defined mbstate_t
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
+#endif
+
#define TAB_WIDTH 8
/* The official name of this program (e.g., no 'g' prefix). */
@@ -34,20 +56,41 @@
#define AUTHORS proper_name ("David MacKenzie")
@ -868,7 +901,7 @@ Index: src/fold.c
{"spaces", no_argument, NULL, 's'},
{"width", required_argument, NULL, 'w'},
{GETOPT_HELP_OPTION_DECL},
@@ -75,6 +1 00 ,7 @@ Wrap input lines in each FILE, writing t
@@ -75,6 +1 18 ,7 @@ Wrap input lines in each FILE, writing t
fputs (_("\
-b, --bytes count bytes rather than columns\n\
@ -876,7 +909,7 @@ Index: src/fold.c
-s, --spaces break at spaces\n\
-w, --width=WIDTH use WIDTH columns instead of 80\n\
"), stdout);
@@ -92,7 +1 18 ,7 @@ Wrap input lines in each FILE, writing t
@@ -92,7 +1 36 ,7 @@ Wrap input lines in each FILE, writing t
static size_t
adjust_column (size_t column, char c)
{
@ -885,7 +918,7 @@ Index: src/fold.c
{
if (c == '\b')
{
@@ -115,30 +1 41 ,14 @@ adjust_column (size_t column, char c)
@@ -115,30 +1 59 ,14 @@ adjust_column (size_t column, char c)
to stdout, with maximum line length WIDTH.
Return true if successful. */
@ -918,7 +951,7 @@ Index: src/fold.c
fadvise (istream, FADVISE_SEQUENTIAL);
@@ -168,6 +1 78 ,15 @@ fold_file (char const *filename, size_t
@@ -168,6 +1 96 ,15 @@ fold_file (char const *filename, size_t
bool found_blank = false;
size_t logical_end = offset_out;
@ -934,16 +967,16 @@ Index: src/fold.c
/* Look for the last blank. */
while (logical_end)
{
@@ -214,11 +2 33 ,221 @@ fold_file (char const *filename, size_t
@@ -214,11 +2 51 ,221 @@ fold_file (char const *filename, size_t
line_out[offset_out++] = c;
}
- saved_errno = errno;
+ *saved_errno = errno;
if (offset_out)
fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
+
+ if (offset_out)
+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
+
+}
+
+#if HAVE_MBRTOWC
@ -1115,10 +1148,10 @@ Index: src/fold.c
+ }
+
+ *saved_errno = errno;
+
+ if (offset_out)
+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
+
if (offset_out)
fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
+}
+#endif
+
@ -1157,7 +1190,7 @@ Index: src/fold.c
if (ferror (istream))
{
error (0, saved_errno, "%s", filename);
@@ -251,7 +4 80 ,8 @@ main (int argc, char **argv)
@@ -251,7 +4 9 8,8 @@ main (int argc, char **argv)
atexit (close_stdout);
@ -1167,7 +1200,7 @@ Index: src/fold.c
while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
{
@@ -260,7 + 490 ,15 @@ main (int argc, char **argv)
@@ -260,7 + 508 ,15 @@ main (int argc, char **argv)
switch (optc)
{
case 'b': /* Count bytes rather than columns. */
@ -1186,8 +1219,8 @@ Index: src/fold.c
case 's': /* Break at word boundaries. */
Index: src/join.c
===================================================================
--- src/join.c.orig 2015-06-26 19:05:22.000000000 +0200
+++ src/join.c 2015-07-09 17:15:19.620057636 +0200
--- src/join.c.orig
+++ src/join.c
@@ -22,18 +22,32 @@
#include <sys/types.h>
#include <getopt.h>
@ -1682,8 +1715,8 @@ Index: src/join.c
case 'z':
Index: src/pr.c
===================================================================
--- src/pr.c.orig 2015-06-26 19:05:22.000000000 +0200
+++ src/pr.c 2015-07-09 17:36:00.714903141 +0200
--- src/pr.c.orig
+++ src/pr.c
@@ -312,6 +312,24 @@
#include <getopt.h>
@ -1709,7 +1742,26 @@ Index: src/pr.c
#include "system.h"
#include "error.h"
#include "fadvise.h"
@@ -416,7 +434,20 @@ struct COLUMN
@@ -324,6 +342,18 @@
#include "xstrtol.h"
#include "xdectoint.h"
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
+#if HAVE_MBRTOWC && defined mbstate_t
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
+#endif
+
+#ifndef HAVE_DECL_WCWIDTH
+"this configure-time declaration test was not run"
+#endif
+#if !HAVE_DECL_WCWIDTH
+extern int wcwidth ();
+#endif
+
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "pr"
@@ -416,7 +446,20 @@ struct COLUMN
typedef struct COLUMN COLUMN;
@ -1731,18 +1783,15 @@ Index: src/pr.c
static bool read_line (COLUMN *p);
static bool print_page (void);
static bool print_stored (COLUMN *p);
@@ -427,8 +458,8 @@ static void pad_across_to (int position)
static void add_line_number (COLUMN *p);
@@ -428,6 +471,7 @@ static void add_line_number (COLUMN *p);
static void getoptnum (const char *n_str, int min, int *num,
const char *errfmt);
-static void getoptarg (char *arg, char switch_char, char *character,
- int *number);
+static void getoptarg (char *arg, char switch_char, char *character, int *character_length,
+ int *character_width, int *number);
static void getoptarg (char *arg, char switch_char, char *character,
+ int *character_length, int *character_width,
int *number);
static void print_files (int number_of_files, char **av);
static void init_parameters (int number_of_files);
static void init_header (char const *filename, int desc);
@@ -441,7 +472,6 @@ static void store_char (char c);
@@ -441,7 +485,6 @@ static void store_char (char c);
static void pad_down (unsigned int lines);
static void read_rest_of_line (COLUMN *p);
static void skip_read (COLUMN *p, int column_number);
@ -1750,7 +1799,7 @@ Index: src/pr.c
static void cleanup (void);
static void print_sep_string (void);
static void separator_string (const char *optarg_S);
@@ -453,7 +4 83 ,7 @@ static COLUMN *column_vector;
@@ -453,7 +4 96 ,7 @@ static COLUMN *column_vector;
we store the leftmost columns contiguously in buff.
To print a line from buff, get the index of the first character
from line_vector[i], and print up to line_vector[i + 1]. */
@ -1759,7 +1808,7 @@ Index: src/pr.c
/* Index of the position in buff where the next character
will be stored. */
@@ -557,7 + 587 ,7 @@ static int chars_per_column;
@@ -557,7 + 600 ,7 @@ static int chars_per_column;
static bool untabify_input = false;
/* (-e) The input tab character. */
@ -1768,7 +1817,7 @@ Index: src/pr.c
/* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
where the leftmost column is 1. */
@@ -567,7 + 597 ,10 @@ static int chars_per_input_tab = 8;
@@ -567,7 + 610 ,10 @@ static int chars_per_input_tab = 8;
static bool tabify_output = false;
/* (-i) The output tab character. */
@ -1780,7 +1829,7 @@ Index: src/pr.c
/* (-i) The width of the output tab. */
static int chars_per_output_tab = 8;
@@ -637,7 +6 70 ,13 @@ static int line_number;
@@ -637,7 +6 83 ,13 @@ static int line_number;
static bool numbered_lines = false;
/* (-n) Character which follows each line number. */
@ -1795,7 +1844,7 @@ Index: src/pr.c
/* (-n) line counting starts with 1st line of input file (not with 1st
line of 1st page printed). */
@@ -690,6 +7 29 ,7 @@ static bool use_col_separator = false;
@@ -690,6 +7 4 2,7 @@ static bool use_col_separator = false;
-a|COLUMN|-m is a 'space' and with the -J option a 'tab'. */
static char *col_sep_string = (char *) "";
static int col_sep_length = 0;
@ -1803,7 +1852,7 @@ Index: src/pr.c
static char *column_separator = (char *) " ";
static char *line_separator = (char *) "\t";
@@ -840,6 +8 80 ,13 @@ separator_string (const char *optarg_S)
@@ -840,6 +8 93 ,13 @@ separator_string (const char *optarg_S)
col_sep_length = (int) strlen (optarg_S);
col_sep_string = xmalloc (col_sep_length + 1);
strcpy (col_sep_string, optarg_S);
@ -1817,7 +1866,7 @@ Index: src/pr.c
}
int
@@ -864,6 +9 11 ,21 @@ main (int argc, char **argv)
@@ -864,6 +9 24 ,21 @@ main (int argc, char **argv)
atexit (close_stdout);
@ -1839,7 +1888,7 @@ Index: src/pr.c
n_files = 0;
file_names = (argc > 1
? xmalloc ((argc - 1) * sizeof (char *))
@@ -940,8 +10 02 ,12 @@ main (int argc, char **argv)
@@ -940,8 +10 15 ,12 @@ main (int argc, char **argv)
break;
case 'e':
if (optarg)
@ -1854,7 +1903,7 @@ Index: src/pr.c
/* Could check tab width > 0. */
untabify_input = true;
break;
@@ -954,8 +10 20 ,12 @@ main (int argc, char **argv)
@@ -954,8 +10 33 ,12 @@ main (int argc, char **argv)
break;
case 'i':
if (optarg)
@ -1869,7 +1918,7 @@ Index: src/pr.c
/* Could check tab width > 0. */
tabify_output = true;
break;
@@ -973,8 +10 43 ,8 @@ main (int argc, char **argv)
@@ -973,8 +10 56 ,8 @@ main (int argc, char **argv)
case 'n':
numbered_lines = true;
if (optarg)
@ -1880,7 +1929,7 @@ Index: src/pr.c
break;
case 'N':
skip_count = false;
@@ -998,7 +10 6 8,7 @@ main (int argc, char **argv)
@@ -998,7 +10 81 ,7 @@ main (int argc, char **argv)
old_s = false;
/* Reset an additional input of -s, -S dominates -s */
col_sep_string = bad_cast ("");
@ -1889,7 +1938,7 @@ Index: src/pr.c
use_col_separator = true;
if (optarg)
separator_string (optarg);
@@ -1152,10 +12 22 ,45 @@ getoptnum (const char *n_str, int min, i
@@ -1152,10 +12 35 ,45 @@ getoptnum (const char *n_str, int min, i
a number. */
static void
@ -1937,7 +1986,7 @@ Index: src/pr.c
if (*arg)
{
long int tmp_long;
@@ -1177,6 +12 82 ,11 @@ static void
@@ -1177,6 +12 95 ,11 @@ static void
init_parameters (int number_of_files)
{
int chars_used_by_number = 0;
@ -1949,7 +1998,7 @@ Index: src/pr.c
lines_per_body = lines_per_page - lines_per_header - lines_per_footer;
if (lines_per_body <= 0)
@@ -1214,7 +13 24 ,7 @@ init_parameters (int number_of_files)
@@ -1214,7 +13 37 ,7 @@ init_parameters (int number_of_files)
else
col_sep_string = column_separator;
@ -1958,7 +2007,7 @@ Index: src/pr.c
use_col_separator = true;
}
/* It's rather pointless to define a TAB separator with column
@@ -1244,11 +13 54 ,11 @@ init_parameters (int number_of_files)
@@ -1244,11 +13 67 ,11 @@ init_parameters (int number_of_files)
+ TAB_WIDTH (chars_per_input_tab, chars_per_number); */
/* Estimate chars_per_text without any margin and keep it constant. */
@ -1972,7 +2021,7 @@ Index: src/pr.c
/* The number is part of the column width unless we are
printing files in parallel. */
@@ -1257,7 +13 67 ,7 @@ init_parameters (int number_of_files)
@@ -1257,7 +13 80 ,7 @@ init_parameters (int number_of_files)
}
chars_per_column = (chars_per_line - chars_used_by_number
@ -1981,7 +2030,7 @@ Index: src/pr.c
if (chars_per_column < 1)
error (EXIT_FAILURE, 0, _("page width too narrow"));
@@ -1275,7 +13 85 ,7 @@ init_parameters (int number_of_files)
@@ -1275,7 +13 9 8,7 @@ init_parameters (int number_of_files)
We've to use 8 as the lower limit, if we use chars_per_default_tab = 8
to expand a tab which is not an input_tab-char. */
free (clump_buff);
@ -1990,7 +2039,7 @@ Index: src/pr.c
}
/* Open the necessary files,
@@ -1383,7 +1 493 ,7 @@ init_funcs (void)
@@ -1383,7 +1 506 ,7 @@ init_funcs (void)
/* Enlarge p->start_position of first column to use the same form of
padding_not_printed with all columns. */
@ -1999,7 +2048,7 @@ Index: src/pr.c
/* This loop takes care of all but the rightmost column. */
@@ -1417,7 +15 27 ,7 @@ init_funcs (void)
@@ -1417,7 +15 40 ,7 @@ init_funcs (void)
}
else
{
@ -2008,7 +2057,7 @@ Index: src/pr.c
h_next = h + chars_per_column;
}
}
@@ -1708,9 +18 18 ,9 @@ static void
@@ -1708,9 +18 3 1,9 @@ static void
align_column (COLUMN *p)
{
padding_not_printed = p->start_position;
@ -2020,7 +2069,7 @@ Index: src/pr.c
padding_not_printed = ANYWHERE;
}
@@ -1981,13 +2 09 1,13 @@ store_char (char c)
@@ -1981,13 +2 104 ,13 @@ store_char (char c)
/* May be too generous. */
buff = X2REALLOC (buff, &buff_allocated);
}
@ -2036,7 +2085,7 @@ Index: src/pr.c
char *s;
int num_width;
@@ -2004,22 +21 14 ,24 @@ add_line_number (COLUMN *p)
@@ -2004,22 +21 27 ,24 @@ add_line_number (COLUMN *p)
/* Tabification is assumed for multiple columns, also for n-separators,
but 'default n-separator = TAB' hasn't been given priority over
equal column_width also specified by POSIX. */
@ -2065,7 +2114,7 @@ Index: src/pr.c
output_position = POS_AFTER_TAB (chars_per_output_tab,
output_position);
}
@@ -2180,7 +2 292 ,7 @@ print_white_space (void)
@@ -2180,7 +2 305 ,7 @@ print_white_space (void)
while (goal - h_old > 1
&& (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
{
@ -2074,7 +2123,7 @@ Index: src/pr.c
h_old = h_new;
}
while (++h_old <= goal)
@@ -2200,6 +23 1 2,7 @@ print_sep_string (void)
@@ -2200,6 +23 25 ,7 @@ print_sep_string (void)
{
char *s;
int l = col_sep_length;
@ -2082,7 +2131,7 @@ Index: src/pr.c
s = col_sep_string;
@@ -2213,6 +23 26 ,7 @@ print_sep_string (void)
@@ -2213,6 +23 39 ,7 @@ print_sep_string (void)
{
for (; separators_not_printed > 0; --separators_not_printed)
{
@ -2090,7 +2139,7 @@ Index: src/pr.c
while (l-- > 0)
{
/* 3 types of sep_strings: spaces only, spaces and chars,
@@ -2226,12 +23 40 ,15 @@ print_sep_string (void)
@@ -2226,12 +23 53 ,15 @@ print_sep_string (void)
}
else
{
@ -2107,7 +2156,7 @@ Index: src/pr.c
/* sep_string ends with some spaces */
if (spaces_not_printed > 0)
print_white_space ();
@@ -2259,7 +23 76 ,7 @@ print_clump (COLUMN *p, int n, char *clu
@@ -2259,7 +23 89 ,7 @@ print_clump (COLUMN *p, int n, char *clu
required number of tabs and spaces. */
static void
@ -2116,7 +2165,7 @@ Index: src/pr.c
{
if (tabify_output)
{
@@ -2283,6 +24 00 ,74 @@ print_char (char c)
@@ -2283,6 +24 13 ,74 @@ print_char (char c)
putchar (c);
}
@ -2191,7 +2240,7 @@ Index: src/pr.c
/* Skip to page PAGE before printing.
PAGE may be larger than total number of pages. */
@@ -2462,9 +26 47 ,9 @@ read_line (COLUMN *p)
@@ -2462,9 +26 60 ,9 @@ read_line (COLUMN *p)
align_empty_cols = false;
}
@ -2203,7 +2252,7 @@ Index: src/pr.c
padding_not_printed = ANYWHERE;
}
@@ -2534,7 +27 19 ,7 @@ print_stored (COLUMN *p)
@@ -2534,7 +27 32 ,7 @@ print_stored (COLUMN *p)
int i;
int line = p->current_line++;
@ -2212,7 +2261,7 @@ Index: src/pr.c
/* FIXME
UMR: Uninitialized memory read:
* This is occurring while in:
@@ -2546,7 +27 31 ,7 @@ print_stored (COLUMN *p)
@@ -2546,7 +27 44 ,7 @@ print_stored (COLUMN *p)
xmalloc [xmalloc.c:94]
init_store_cols [pr.c:1648]
*/
@ -2221,7 +2270,7 @@ Index: src/pr.c
pad_vertically = true;
@@ -2565,9 +27 50 ,9 @@ print_stored (COLUMN *p)
@@ -2565,9 +27 63 ,9 @@ print_stored (COLUMN *p)
}
}
@ -2233,7 +2282,7 @@ Index: src/pr.c
padding_not_printed = ANYWHERE;
}
@@ -2580,8 +27 65 ,8 @@ print_stored (COLUMN *p)
@@ -2580,8 +27 78 ,8 @@ print_stored (COLUMN *p)
if (spaces_not_printed == 0)
{
output_position = p->start_position + end_vector[line];
@ -2244,7 +2293,7 @@ Index: src/pr.c
}
return true;
@@ -2600,7 +27 85 ,7 @@ print_stored (COLUMN *p)
@@ -2600,7 +27 9 8,7 @@ print_stored (COLUMN *p)
number of characters is 1.) */
static int
@ -2253,7 +2302,7 @@ Index: src/pr.c
{
unsigned char uc = c;
char *s = clump_buff;
@@ -2610,10 +2 795 ,10 @@ char_to_clump (char c)
@@ -2610,10 +2 808 ,10 @@ char_to_clump (char c)
int chars;
int chars_per_c = 8;
@ -2266,7 +2315,7 @@ Index: src/pr.c
{
width = TAB_WIDTH (chars_per_c, input_position);
@@ -2694,6 +28 7 9,164 @@ char_to_clump (char c)
@@ -2694,6 +28 92 ,164 @@ char_to_clump (char c)
return chars;
}
@ -2433,8 +2482,8 @@ Index: src/pr.c
Index: src/sort.c
===================================================================
--- src/sort.c.orig 2015-06-26 19:05:22.000000000 +0200
+++ src/sort.c 2015-07-09 17:28:20.750707716 +0200
--- src/sort.c.orig
+++ src/sort.c
@@ -29,6 +29,14 @@
#include <sys/wait.h>
#include <signal.h>
@ -3112,7 +3161,7 @@ Index: src/sort.c
else if (key->random)
diff = compare_random (ta, tlena, tb, tlenb);
else if (key->version)
@@ -2694,6 +3134, 193 @@ keycompare (struct line const *a, struct
@@ -2694,6 +3134, 211 @@ keycompare (struct line const *a, struct
return key->reverse ? -diff : diff;
}
@ -3217,6 +3266,9 @@ Index: src/sort.c
+ size_t lena = lima <= texta ? 0 : lima - texta;
+ size_t lenb = limb <= textb ? 0 : limb - textb;
+
+ char enda IF_LINT (= 0);
+ char endb IF_LINT (= 0);
+
+ char const *translate = key->translate;
+ bool const *ignore = key->ignore;
+
@ -3236,6 +3288,12 @@ Index: src/sort.c
+ texta = copy_a; textb = copy_b;
+ lena = new_len_a; lenb = new_len_b;
+ }
+ else
+ {
+ /* Use the keys in-place, temporarily null-terminated. */
+ enda = texta[lena]; texta[lena] = '\0';
+ endb = textb[lenb]; textb[lenb] = '\0';
+ }
+
+ if (key->random)
+ diff = compare_random (texta, lena, textb, lenb);
@ -3259,13 +3317,22 @@ Index: src/sort.c
+ diff = 1;
+ else if (hard_LC_COLLATE && !folding)
+ {
+ diff = xmemcoll0 (texta, lena , textb, lenb);
+ diff = xmemcoll0 (texta, lena + 1 , textb, lenb + 1 );
+ }
+ else
+ diff = memcmp (texta, textb, MIN (lena + 1,lenb + 1));
+ {
+ diff = memcmp (texta, textb, MIN (lena, lenb));
+ if (diff == 0)
+ diff = lena < lenb ? -1 : lena != lenb;
+ }
+
+ if (ignore || translate)
+ free (texta);
+ else
+ {
+ texta[lena] = enda;
+ textb[lenb] = endb;
+ }
+
+ if (diff)
+ goto not_equal;
@ -3306,7 +3373,7 @@ Index: src/sort.c
/* Compare two lines A and B, returning negative, zero, or positive
depending on whether A compares less than, equal to, or greater than B. */
@@ -2721,7 +33 48 ,7 @@ compare (struct line const *a, struct li
@@ -2721,7 +33 66 ,7 @@ compare (struct line const *a, struct li
diff = - NONZERO (blen);
else if (blen == 0)
diff = 1;
@ -3315,7 +3382,7 @@ Index: src/sort.c
{
/* Note xmemcoll0 is a performance enhancement as
it will not unconditionally write '\0' after the
@@ -4120,6 +47 47 ,7 @@ set_ordering (char const *s, struct keyf
@@ -4120,6 +47 65 ,7 @@ set_ordering (char const *s, struct keyf
break;
case 'f':
key->translate = fold_toupper;
@ -3323,7 +3390,7 @@ Index: src/sort.c
break;
case 'g':
key->general_numeric = true;
@@ -4197,7 +48 25 ,7 @@ main (int argc, char **argv)
@@ -4197,7 +48 43 ,7 @@ main (int argc, char **argv)
initialize_exit_failure (SORT_FAILURE);
hard_LC_COLLATE = hard_locale (LC_COLLATE);
@ -3332,7 +3399,7 @@ Index: src/sort.c
hard_LC_TIME = hard_locale (LC_TIME);
#endif
@@ -4218,6 +48 4 6,29 @@ main (int argc, char **argv)
@@ -4218,6 +48 64 ,29 @@ main (int argc, char **argv)
thousands_sep = -1;
}
@ -3362,7 +3429,7 @@ Index: src/sort.c
have_read_stdin = false;
inittables ();
@@ -4492,13 +51 43 ,34 @@ main (int argc, char **argv)
@@ -4492,13 +51 61 ,34 @@ main (int argc, char **argv)
case 't':
{
@ -3401,7 +3468,7 @@ Index: src/sort.c
else
{
/* Provoke with 'sort -txx'. Complain about
@@ -4509,9 +51 81 ,12 @@ main (int argc, char **argv)
@@ -4509,9 +51 99 ,12 @@ main (int argc, char **argv)
quote (optarg));
}
}
@ -3418,8 +3485,8 @@ Index: src/sort.c
Index: src/unexpand.c
===================================================================
--- src/unexpand.c.orig 2015-06-26 19:05:22.000000000 +0200
+++ src/unexpand.c 2015-07-09 17:15:19.622057589 +0200
--- src/unexpand.c.orig
+++ src/unexpand.c
@@ -38,12 +38,29 @@
#include <stdio.h>
#include <getopt.h>
@ -3677,8 +3744,8 @@ Index: src/unexpand.c
error (EXIT_FAILURE, errno, "-");
Index: src/uniq.c
===================================================================
--- src/uniq.c.orig 2015-06-26 19:04:19.000000000 +0200
+++ src/uniq.c 2015-07-09 17:15:19.622057589 +0200
--- src/uniq.c.orig
+++ src/uniq.c
@@ -21,6 +21,17 @@
#include <getopt.h>
#include <sys/types.h>
@ -4064,8 +4131,8 @@ Index: src/uniq.c
check_chars = SIZE_MAX;
Index: tests/i18n/sort.sh
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ tests/i18n/sort.sh 2015-07-09 17:15:19.622057589 +0200
--- /dev/null
+++ tests/i18n/sort.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Verify sort's multi-byte support.
@ -4098,28 +4165,21 @@ Index: tests/i18n/sort.sh
+Exit $fail
Index: tests/local.mk
===================================================================
--- tests/local.mk.orig 2015-07-03 14:03:55.000000000 +0200
+++ tests/local.mk 2015-07-09 17:28:19.101746451 +0200
@@ -341,6 +341, 7 @@ all_tests = \
--- tests/local.mk.orig
+++ tests/local.mk
@@ -341,6 +341, 8 @@ all_tests = \
tests/misc/sort-discrim.sh \
tests/misc/sort-files0-from.pl \
tests/misc/sort-float.sh \
+ tests/misc/sort-mb-tests.sh \
+ tests/i18n/sort.sh \
tests/misc/sort-merge.pl \
tests/misc/sort-merge-fdlimit.sh \
tests/misc/sort-month.sh \
@@ -532,6 +533,7 @@ all_tests = \
tests/du/threshold.sh \
tests/du/trailing-slash.sh \
tests/du/two-args.sh \
+ tests/i18n/sort.sh \
tests/id/gnu-zero-uids.sh \
tests/id/no-context.sh \
tests/id/context.sh \
Index: tests/misc/cut.pl
===================================================================
--- tests/misc/cut.pl.orig 2015-06-26 19:04:19.000000000 +0200
+++ tests/misc/cut.pl 2015-07-09 17:15:19.622057589 +0200
--- tests/misc/cut.pl.orig
+++ tests/misc/cut.pl
@@ -23,9 +23,11 @@ use strict;
# Turn off localization of executable's output.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
@ -4144,8 +4204,8 @@ Index: tests/misc/cut.pl
push @Tests, @new;
Index: tests/misc/expand.pl
===================================================================
--- tests/misc/expand.pl.orig 2015-06-26 19:04:19.000000000 +0200
+++ tests/misc/expand.pl 2015-07-09 17:15:19.622057589 +0200
--- tests/misc/expand.pl.orig
+++ tests/misc/expand.pl
@@ -23,6 +23,15 @@ use strict;
# Turn off localization of executable's output.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
@ -4202,8 +4262,8 @@ Index: tests/misc/expand.pl
Index: tests/misc/fold.pl
===================================================================
--- tests/misc/fold.pl.orig 2015-06-26 19:04:19.000000000 +0200
+++ tests/misc/fold.pl 2015-07-09 17:15:19.623057566 +0200
--- tests/misc/fold.pl.orig
+++ tests/misc/fold.pl
@@ -20,9 +20,18 @@ use strict;
(my $program_name = $0) =~ s|.*/||;
@ -4275,8 +4335,8 @@ Index: tests/misc/fold.pl
exit $fail;
Index: tests/misc/join.pl
===================================================================
--- tests/misc/join.pl.orig 2015-06-26 19:04:19.000000000 +0200
+++ tests/misc/join.pl 2015-07-09 17:15:19.623057566 +0200
--- tests/misc/join.pl.orig
+++ tests/misc/join.pl
@@ -25,6 +25,15 @@ my $limits = getlimits ();
my $prog = 'join';
@ -4345,8 +4405,8 @@ Index: tests/misc/join.pl
Index: tests/misc/sort-mb-tests.sh
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ tests/misc/sort-mb-tests.sh 2015-07-09 17:15:19.623057566 +0200
--- /dev/null
+++ tests/misc/sort-mb-tests.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+# Verify sort's multi-byte support.
@ -4395,8 +4455,8 @@ Index: tests/misc/sort-mb-tests.sh
+Exit $fail
Index: tests/misc/sort-merge.pl
===================================================================
--- tests/misc/sort-merge.pl.orig 2015-06-26 19:04:19.000000000 +0200
+++ tests/misc/sort-merge.pl 2015-07-09 17:15:19.623057566 +0200
--- tests/misc/sort-merge.pl.orig
+++ tests/misc/sort-merge.pl
@@ -26,6 +26,15 @@ my $prog = 'sort';
# Turn off localization of executable's output.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
@ -4455,8 +4515,8 @@ Index: tests/misc/sort-merge.pl
Index: tests/misc/sort.pl
===================================================================
--- tests/misc/sort.pl.orig 2015-06-26 19:04:19.000000000 +0200
+++ tests/misc/sort.pl 2015-07-09 17:28:20.750707716 +0200
--- tests/misc/sort.pl.orig
+++ tests/misc/sort.pl
@@ -24,10 +24,15 @@ my $prog = 'sort';
# Turn off localization of executable's output.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
@ -4474,7 +4534,7 @@ Index: tests/misc/sort.pl
# Since each test is run with a file name and with redirected stdin,
# the name in the diagnostic is either the file name or "-".
# Normalize each diagnostic to use '-'.
@@ -419,6 +424,3 7 @@ foreach my $t (@Tests)
@@ -419,6 +424,3 8 @@ foreach my $t (@Tests)
}
}
@ -4504,6 +4564,7 @@ Index: tests/misc/sort.pl
+ #disable several failing tests until investigation, disable all tests with envvars set
+ next if (grep {ref $_ eq 'HASH' && exists $_->{ENV}} (@new_t));
+ next if ($test_name =~ "18g" or $test_name =~ "sort-numeric" or $test_name =~ "08[ab]" or $test_name =~ "03[def]" or $test_name =~ "h4" or $test_name =~ "n1" or $test_name =~ "2[01]a");
+ next if ($test_name =~ "11[ab]"); # avoid FP: expected result differs to MB result due to collation rules.
+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}];
+ }
+ push @Tests, @new;
@ -4512,7 +4573,7 @@ Index: tests/misc/sort.pl
@Tests = triple_test \@Tests;
# Remember that triple_test creates from each test with exactly one "IN"
@@ -428,6 +46 4 ,7 @@ foreach my $t (@Tests)
@@ -428,6 +46 5 ,7 @@ foreach my $t (@Tests)
# Remove the IN_PIPE version of the "output-is-input" test above.
# The others aren't susceptible because they have three inputs each.
@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests;
@ -4522,8 +4583,8 @@ Index: tests/misc/sort.pl
my $verbose = $ENV{VERBOSE};
Index: tests/misc/unexpand.pl
===================================================================
--- tests/misc/unexpand.pl.orig 2015-06-26 19:04:19.000000000 +0200
+++ tests/misc/unexpand.pl 2015-07-09 17:15:19.623057566 +0200
--- tests/misc/unexpand.pl.orig
+++ tests/misc/unexpand.pl
@@ -27,6 +27,14 @@ my $limits = getlimits ();
my $prog = 'unexpand';
@ -4579,8 +4640,8 @@ Index: tests/misc/unexpand.pl
Index: tests/misc/uniq.pl
===================================================================
--- tests/misc/uniq.pl.orig 2015-06-26 19:04:19.000000000 +0200
+++ tests/misc/uniq.pl 2015-07-09 17:15:19.623057566 +0200
--- tests/misc/uniq.pl.orig
+++ tests/misc/uniq.pl
@@ -23,9 +23,17 @@ my $limits = getlimits ();
my $prog = 'uniq';
my $try = "Try '$prog --help' for more information.\n";
@ -4655,13 +4716,12 @@ Index: tests/misc/uniq.pl
Index: tests/pr/pr-tests.pl
===================================================================
--- tests/pr/pr-tests.pl.orig 2015-06-26 19:04:19.000000000 +0200
+++ tests/pr/pr-tests.pl 2015-07-09 17:15:19.624057542 +0200
@@ -22,6 +22,16 @@ use strict;
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
--- tests/pr/pr-tests.pl.orig
+++ tests/pr/pr-tests.pl
@@ -24,6 +24,15 @@ use strict;
my $prog = 'pr';
+
my $normalize_strerror = "s/': .*/'/";
+my $mb_locale;
+#Uncomment the following line to enable multibyte tests
+$mb_locale = $ENV{LOCALE_FR_UTF8};
@ -4671,10 +4731,10 @@ Index: tests/pr/pr-tests.pl
+my $try = "Try \`$prog --help' for more information.\n";
+my $inval = "$prog: invalid byte, character or field list\n$try";
+
my $normalize_strerror = "s/': .*/'/";
my @tv = (
@@ -467,8 +477,48 @@ push @Tests,
# -b option is no longer an official option. But it's still working to
@@ -467,8 +476,48 @@ push @Tests,
{IN=>{3=>"x\ty\tz\n"}},
{OUT=>join("\t", qw(a b c m n o x y z)) . "\n"} ];