From 05cc13885155907d6a0ff8eb06d54d3eb052903b8c604ba89b71d537ddafce2e Mon Sep 17 00:00:00 2001 From: Bernhard Voelker Date: Tue, 3 Dec 2013 16:53:43 +0000 Subject: [PATCH] Accepting request 209118 from home:bernhard-voelker:branches:Base:System - Update I18N patch from Fedora: (coreutils-i18n.patch) * sort: fix multibyte incompabilities (rh#821264) * pr -e, with a mix of backspaces and TABs, could corrupt the heap in multibyte locales (analyzed by J.Koncicky) * path in the testsuite to cover i18n regressions * Enable cut and sort-merge perl tests for multibyte as well - Refresh longlong-aarch64.patch. OBS-URL: https://build.opensuse.org/request/show/209118 OBS-URL: https://build.opensuse.org/package/show/Base:System/coreutils?expand=0&rev=209 --- coreutils-i18n.patch | 728 ++++++++++++++++++++++++++++++------ coreutils-testsuite.changes | 12 + coreutils.changes | 12 + longlong-aarch64.patch | 13 +- 4 files changed, 637 insertions(+), 128 deletions(-) diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch index a1bd0eb..da1f938 100644 --- a/coreutils-i18n.patch +++ b/coreutils-i18n.patch @@ -1,20 +1,24 @@ - lib/linebuffer.h | 8 + - src/cut.c | 420 +++++++++++++++++++++++++-- - src/expand.c | 160 ++++++++++- - src/fold.c | 309 ++++++++++++++++++-- - src/join.c | 347 +++++++++++++++++++--- - src/pr.c | 431 +++++++++++++++++++++++++--- - src/sort.c | 722 +++++++++++++++++++++++++++++++++++++++++++--- - src/unexpand.c | 226 ++++++++++++++- - src/uniq.c | 259 ++++++++++++++++- - tests/Makefile.am | 5 + - tests/misc/cut | 4 +- - tests/misc/mb1.I | 4 + - tests/misc/mb1.X | 4 + - tests/misc/mb2.I | 4 + - tests/misc/mb2.X | 4 + - tests/misc/sort-mb-tests | 58 ++++ - 16 files changed, 2783 insertions(+), 182 deletions(-) + lib/linebuffer.h | 8 + src/cut.c | 426 ++++++++++++++++++++++++- + src/expand.c | 160 +++++++++ + src/fold.c | 308 ++++++++++++++++-- + src/join.c | 361 ++++++++++++++++++--- + src/pr.c | 448 +++++++++++++++++++++++--- + src/sort.c | 745 +++++++++++++++++++++++++++++++++++++++++--- + src/unexpand.c | 228 +++++++++++++ + src/uniq.c | 266 +++++++++++++++ + tests/local.mk | 1 + tests/misc/cut.pl | 7 + tests/misc/expand.pl | 40 ++ + tests/misc/fold.pl | 50 ++ + tests/misc/join.pl | 50 ++ + tests/misc/sort-mb-tests.sh | 45 ++ + tests/misc/sort-merge.pl | 42 ++ + tests/misc/sort.pl | 38 ++ + tests/misc/unexpand.pl | 39 ++ + tests/misc/uniq.pl | 46 ++ + tests/pr/pr-tests.pl | 49 ++ + 20 files changed, 3169 insertions(+), 188 deletions(-) Index: lib/linebuffer.h @@ -234,7 +238,7 @@ Index: src/cut.c + size_t mblength; /* The byte size of a multibyte character which shows + as same character as WC. */ + mbstate_t state; /* State of the stream. */ -+ int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */ ++ int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */ + /* Whether to begin printing delimiters between ranges for the current line. + Set after we've begun printing data corresponding to the first range. */ + bool print_delimiter = false; @@ -951,7 +955,7 @@ Index: src/fold.c /* Look for the last blank. */ while (logical_end) { -@@ -215,11 +252,222 @@ fold_file (char const *filename, size_t +@@ -215,11 +252,221 @@ fold_file (char const *filename, size_t line_out[offset_out++] = c; } @@ -1029,7 +1033,6 @@ Index: src/fold.c + break; + + /* Get a wide character. */ -+ convfail = 0; + state_bak = state; + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); + @@ -1063,7 +1066,7 @@ Index: src/fold.c + fwrite (line_out, sizeof(char), offset_out, stdout); + START_NEW_LINE; + continue; -+ ++ + case L'\b': + increment = (column > 0) ? -1 : 0; + break; @@ -1175,7 +1178,7 @@ Index: src/fold.c if (ferror (istream)) { error (0, saved_errno, "%s", filename); -@@ -252,7 +500,8 @@ main (int argc, char **argv) +@@ -252,7 +499,8 @@ main (int argc, char **argv) atexit (close_stdout); @@ -1185,7 +1188,7 @@ Index: src/fold.c while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) { -@@ -261,7 +510,15 @@ main (int argc, char **argv) +@@ -261,7 +509,15 @@ main (int argc, char **argv) switch (optc) { case 'b': /* Count bytes rather than columns. */ @@ -1420,7 +1423,7 @@ Index: src/join.c + extract_field (line, ptr, lim - ptr); +} +#endif -+ ++ static void freeline (struct line *line) { @@ -1564,13 +1567,13 @@ Index: src/join.c + + return diff; } - + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); + + if (mallocd) + for (i = 0; i < 2; i++) + free (copy[i]); + + if (diff) return diff; - return len1 < len2 ? -1 : len1 != len2; @@ -1578,7 +1581,7 @@ Index: src/join.c } /* Check that successive input lines PREV and CURRENT from input file -@@ -454,6 +687,12 @@ get_line (FILE *fp, struct line **linep, +@@ -454,6 +687,11 @@ get_line (FILE *fp, struct line **linep, } ++line_no[which - 1]; @@ -1587,23 +1590,21 @@ Index: src/join.c + xfields_multibyte (line); + else +#endif -+ xfields (line); if (prevline[which - 1]) -@@ -552,22 +791,29 @@ prfield (size_t n, struct line const *li - } +@@ -553,21 +791,28 @@ prfield (size_t n, struct line const *li /* Output all the fields in line, other than the join field. */ + +#define PUT_TAB_CHAR \ + do \ + { \ + (tab != NULL) ? \ -+ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ ++ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ + } \ + while (0) + - static void prfields (struct line const *line, size_t join_field, size_t autocount) { @@ -1624,7 +1625,7 @@ Index: src/join.c prfield (i, line); } } -@@ -578,7 +824,6 @@ static void +@@ -578,7 +823,6 @@ static void prjoin (struct line const *line1, struct line const *line2) { const struct outlist *outlist; @@ -1632,7 +1633,7 @@ Index: src/join.c size_t field; struct line const *line; -@@ -612,7 +857,7 @@ prjoin (struct line const *line1, struct +@@ -612,7 +856,7 @@ prjoin (struct line const *line1, struct o = o->next; if (o == NULL) break; @@ -1641,7 +1642,7 @@ Index: src/join.c } putchar ('\n'); } -@@ -1090,21 +1335,46 @@ main (int argc, char **argv) +@@ -1090,21 +1334,46 @@ main (int argc, char **argv) case 't': { @@ -2290,7 +2291,7 @@ Index: src/pr.c { width = TAB_WIDTH (chars_per_c, input_position); -@@ -2724,6 +2930,154 @@ char_to_clump (char c) +@@ -2724,6 +2930,164 @@ char_to_clump (char c) return chars; } @@ -2437,7 +2438,17 @@ Index: src/pr.c + mbc_pos -= mblength; + } + -+ input_position += width; ++ /* Too many backspaces must put us in position 0 -- never negative. */ ++ if (width < 0 && input_position == 0) ++ { ++ chars = 0; ++ input_position = 0; ++ } ++ else if (width < 0 && input_position <= -width) ++ input_position = 0; ++ else ++ input_position += width; ++ + return chars; +} +#endif @@ -2885,7 +2896,7 @@ Index: src/sort.c + { + /* If we're skipping leading blanks, don't start counting characters + * until after skipping past any leading blanks. */ -+ if (key->skipsblanks) ++ if (key->skipeblanks) + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) + ptr += mblength; + @@ -3121,7 +3132,7 @@ Index: src/sort.c else if (key->random) diff = compare_random (ta, tlena, tb, tlenb); else if (key->version) -@@ -2662,6 +3099,181 @@ keycompare (struct line const *a, struct +@@ -2662,6 +3099,191 @@ keycompare (struct line const *a, struct return key->reverse ? -diff : diff; } @@ -3142,45 +3153,14 @@ Index: src/sort.c + wchar_t wc_a, wc_b; + mbstate_t state_a, state_b; + -+ int diff; ++ int diff = 0; + + memset (&state_a, '\0', sizeof(mbstate_t)); + memset (&state_b, '\0', sizeof(mbstate_t)); ++ /* Ignore keys with start after end. */ ++ if (a->keybeg - a->keylim > 0) ++ return 0; + -+ for (;;) -+ { -+ char const *translate = key->translate; -+ bool const *ignore = key->ignore; -+ -+ /* Find the lengths. */ -+ size_t lena = lima <= texta ? 0 : lima - texta; -+ size_t lenb = limb <= textb ? 0 : limb - textb; -+ -+ /* Actually compare the fields. */ -+ if (key->random) -+ diff = compare_random (texta, lena, textb, lenb); -+ else if (key->numeric | key->general_numeric | key->human_numeric) -+ { -+ char savea = *lima, saveb = *limb; -+ -+ *lima = *limb = '\0'; -+ diff = (key->numeric ? numcompare (texta, textb) -+ : key->general_numeric ? general_numcompare (texta, textb) -+ : human_numcompare (texta, textb)); -+ *lima = savea, *limb = saveb; -+ } -+ else if (key->version) -+ diff = filevercmp (texta, textb); -+ else if (key->month) -+ diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL); -+ else -+ { -+ if (ignore || translate) -+ { -+ char *copy_a = (char *) xmalloc (lena + 1 + lenb + 1); -+ char *copy_b = copy_a + lena + 1; -+ size_t new_len_a, new_len_b; -+ size_t i, j; + + /* Ignore and/or translate chars before comparing. */ +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ @@ -3204,7 +3184,7 @@ Index: src/sort.c + STATE = state_bak; \ + if (!ignore) \ + COPY[NEW_LEN++] = TEXT[i]; \ -+ i++; \ ++ i++; \ + continue; \ + } \ + \ @@ -3248,21 +3228,63 @@ Index: src/sort.c + COPY[NEW_LEN] = '\0'; \ + } \ + while (0) -+ IGNORE_CHARS (new_len_a, lena, texta, copy_a, -+ wc_a, mblength_a, state_a); -+ IGNORE_CHARS (new_len_b, lenb, textb, copy_b, -+ wc_b, mblength_b, state_b); -+ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); -+ free(copy_a); -+ } -+ else if (lena == 0) -+ diff = - NONZERO (lenb); -+ else if (lenb == 0) -+ goto greater; -+ else -+ diff = xmemcoll (texta, lena, textb, lenb); ++ ++ /* Actually compare the fields. */ ++ ++ for (;;) ++ { ++ /* Find the lengths. */ ++ size_t lena = lima <= texta ? 0 : lima - texta; ++ size_t lenb = limb <= textb ? 0 : limb - textb; ++ ++ char const *translate = key->translate; ++ bool const *ignore = key->ignore; ++ ++ if (ignore || translate) ++ { ++ char *copy_a = (char *) xmalloc (lena + 1 + lenb + 1); ++ char *copy_b = copy_a + lena + 1; ++ size_t new_len_a, new_len_b; ++ size_t i, j; ++ ++ IGNORE_CHARS (new_len_a, lena, texta, copy_a, ++ wc_a, mblength_a, state_a); ++ IGNORE_CHARS (new_len_b, lenb, textb, copy_b, ++ wc_b, mblength_b, state_b); ++ texta = copy_a; textb = copy_b; ++ lena = new_len_a; lenb = new_len_b; + } + ++ if (key->random) ++ diff = compare_random (texta, lena, textb, lenb); ++ else if (key->numeric | key->general_numeric | key->human_numeric) ++ { ++ char savea = *lima, saveb = *limb; ++ ++ *lima = *limb = '\0'; ++ diff = (key->numeric ? numcompare (texta, textb) ++ : key->general_numeric ? general_numcompare (texta, textb) ++ : human_numcompare (texta, textb)); ++ *lima = savea, *limb = saveb; ++ } ++ else if (key->version) ++ diff = filevercmp (texta, textb); ++ else if (key->month) ++ diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL); ++ else if (lena == 0) ++ diff = - NONZERO (lenb); ++ else if (lenb == 0) ++ diff = 1; ++ else ++ { ++ diff = memcmp (texta, textb, MIN (lena,lenb)); ++ if (!diff) ++ diff = xmemcoll (texta, lena, textb, lenb); ++ } ++ ++ if (ignore || translate) ++ free (texta); ++ + if (diff) + goto not_equal; + @@ -3291,19 +3313,33 @@ Index: src/sort.c + } + } + -+ return 0; -+ -+greater: -+ diff = 1; +not_equal: -+ return key->reverse ? -diff : diff; ++ if (key && key->reverse) ++ return -diff; ++ else ++ return diff; +} +#endif + /* Compare two lines A and B, returning negative, zero, or positive depending on whether A compares less than, equal to, or greater than B. */ -@@ -4157,7 +4769,7 @@ main (int argc, char **argv) +@@ -2689,14 +3311,6 @@ compare (struct line const *a, struct li + diff = - NONZERO (blen); + else if (blen == 0) + diff = 1; +- else if (hard_LC_COLLATE) +- { +- /* Note xmemcoll0 is a performance enhancement as +- it will not unconditionally write '\0' after the +- passed in buffers, which was seen to give around +- a 3% increase in performance for short lines. */ +- diff = xmemcoll0 (a->text, alen + 1, b->text, blen + 1); +- } + else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen)))) + diff = alen < blen ? -1 : alen != blen; + +@@ -4157,7 +4771,7 @@ main (int argc, char **argv) initialize_exit_failure (SORT_FAILURE); hard_LC_COLLATE = hard_locale (LC_COLLATE); @@ -3312,7 +3348,7 @@ Index: src/sort.c hard_LC_TIME = hard_locale (LC_TIME); #endif -@@ -4178,6 +4790,29 @@ main (int argc, char **argv) +@@ -4178,6 +4792,29 @@ main (int argc, char **argv) thousands_sep = -1; } @@ -3342,7 +3378,7 @@ Index: src/sort.c have_read_stdin = false; inittables (); -@@ -4452,13 +5087,34 @@ main (int argc, char **argv) +@@ -4452,13 +5089,34 @@ main (int argc, char **argv) case 't': { @@ -3381,7 +3417,7 @@ Index: src/sort.c else { /* Provoke with 'sort -txx'. Complain about -@@ -4469,9 +5125,12 @@ main (int argc, char **argv) +@@ -4469,9 +5127,12 @@ main (int argc, char **argv) quote (optarg)); } } @@ -3767,7 +3803,7 @@ Index: src/uniq.c + while (pos < size) + { + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ ++ + if (convfail || !iswblank (wc)) + { + pos += mblength; @@ -3826,7 +3862,6 @@ Index: src/uniq.c + copy_old[i] = toupper (old[i]); + copy_new[i] = toupper (new[i]); + } -+ + bool rc = xmemcoll (copy_old, oldlen, copy_new, newlen); + free (copy_old); + free (copy_new); @@ -3842,6 +3877,7 @@ Index: src/uniq.c + } + + return xmemcoll (copy_old, oldlen, copy_new, newlen); ++ +} + +#if HAVE_MBRTOWC @@ -3906,11 +3942,11 @@ Index: src/uniq.c + copy[i][j] = '\0'; + len[i] = j; + } -+ + int rc = xmemcoll (copy[0], len[0], copy[1], len[1]); + free (copy[0]); + free (copy[1]); + return rc; ++ } +#endif @@ -4036,43 +4072,40 @@ Index: tests/local.mk =================================================================== --- tests/local.mk.orig +++ tests/local.mk -@@ -324,6 +324,7 @@ all_tests = \ - tests/misc/sort-debug-warn.sh \ +@@ -325,6 +325,7 @@ all_tests = \ tests/misc/sort-discrim.sh \ tests/misc/sort-files0-from.pl \ -+ tests/misc/sort-mb-tests.sh \ tests/misc/sort-float.sh \ ++ tests/misc/sort-mb-tests.sh \ tests/misc/sort-merge.pl \ tests/misc/sort-merge-fdlimit.sh \ -@@ -655,6 +656,10 @@ $(factor_tests): $(tf)/run.sh $(tf)/crea - CLEANFILES += $(factor_tests) - - pr_data = \ -+ tests/misc/mb1.X \ -+ tests/misc/mb1.I \ -+ tests/misc/mb2.X \ -+ tests/misc/mb2.I \ - tests/pr/0F \ - tests/pr/0FF \ - tests/pr/0FFnt \ + tests/misc/sort-month.sh \ Index: tests/misc/cut.pl =================================================================== --- tests/misc/cut.pl.orig +++ tests/misc/cut.pl -@@ -23,9 +23,10 @@ use strict; +@@ -23,9 +23,11 @@ use strict; # Turn off localization of executable's output. @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; -my $mb_locale = $ENV{LOCALE_FR_UTF8}; --! defined $mb_locale || $mb_locale eq 'none' ++my $mb_locale; ++# uncommented enable multibyte paths ++$mb_locale = $ENV{LOCALE_FR_UTF8}; + ! defined $mb_locale || $mb_locale eq 'none' - and $mb_locale = 'C'; -+#my $mb_locale = $ENV{LOCALE_FR_UTF8}; -+#! defined $mb_locale || $mb_locale eq 'none' -+# and $mb_locale = 'C'; -+my $mb_locale = 'C'; ++ and $mb_locale = 'C'; my $prog = 'cut'; my $try = "Try '$prog --help' for more information.\n"; +@@ -223,6 +225,7 @@ if ($mb_locale ne 'C') + my @new_t = @$t; + my $test_name = shift @new_t; + ++ next if ($test_name =~ "newline-[12][0-9]"); + push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; + } + push @Tests, @new; Index: tests/misc/expand.pl =================================================================== --- tests/misc/expand.pl.orig @@ -4081,7 +4114,7 @@ Index: tests/misc/expand.pl # Turn off localization of executable's output. @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; -+# uncommented according to upstream commit enabling multibyte paths ++#comment out next line to disable multibyte tests +my $mb_locale = $ENV{LOCALE_FR_UTF8}; +! defined $mb_locale || $mb_locale eq 'none' + and $mb_locale = 'C'; @@ -4131,6 +4164,149 @@ Index: tests/misc/expand.pl my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; +Index: tests/misc/fold.pl +=================================================================== +--- tests/misc/fold.pl.orig ++++ tests/misc/fold.pl +@@ -20,9 +20,18 @@ use strict; + + (my $program_name = $0) =~ s|.*/||; + ++my $prog = 'fold'; ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + # Turn off localization of executable's output. + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + ++# uncommented to enable multibyte paths ++my $mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ + my @Tests = + ( + ['s1', '-w2 -s', {IN=>"a\t"}, {OUT=>"a\n\t"}], +@@ -31,9 +40,48 @@ my @Tests = + ['s4', '-w4 -s', {IN=>"abc ef\n"}, {OUT=>"abc \nef\n"}], + ); + ++# Add _POSIX2_VERSION=199209 to the environment of each test ++# that uses an old-style option like +1. ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether fold is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ ++@Tests = triple_test \@Tests; ++ ++# Remember that triple_test creates from each test with exactly one "IN" ++# file two more tests (.p and .r suffix on name) corresponding to reading ++# input from a file and from a pipe. The pipe-reading test would fail ++# due to a race condition about 1 in 20 times. ++# Remove the IN_PIPE version of the "output-is-input" test above. ++# The others aren't susceptible because they have three inputs each. ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; ++ + my $save_temps = $ENV{DEBUG}; + my $verbose = $ENV{VERBOSE}; + +-my $prog = 'fold'; + my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); + exit $fail; +Index: tests/misc/join.pl +=================================================================== +--- tests/misc/join.pl.orig ++++ tests/misc/join.pl +@@ -25,6 +25,15 @@ my $limits = getlimits (); + + my $prog = 'join'; + ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ ++my $mb_locale; ++#Comment out next line to disable multibyte tests ++$mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ + my $delim = chr 0247; + sub t_subst ($) + { +@@ -306,8 +315,49 @@ foreach my $t (@tv) + push @Tests, $new_ent; + } + ++# Add _POSIX2_VERSION=199209 to the environment of each test ++# that uses an old-style option like +1. ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether join is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ #Adjust the output some error messages including test_name for mb ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR}} ++ (@new_t)) ++ { ++ my $sub2 = {ERR_SUBST => "s/$test_name-mb/$test_name/"}; ++ push @new_t, $sub2; ++ push @$t, $sub2; ++ } ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ + @Tests = triple_test \@Tests; + ++#skip invalid-j-mb test, it is failing because of the format ++@Tests = grep {$_->[0] ne 'invalid-j-mb'} @Tests; ++ + my $save_temps = $ENV{DEBUG}; + my $verbose = $ENV{VERBOSE}; + Index: tests/misc/sort-mb-tests.sh =================================================================== --- /dev/null @@ -4181,3 +4357,315 @@ Index: tests/misc/sort-mb-tests.sh +compare exp out || { fail=1; cat out; } + +Exit $fail +Index: tests/misc/sort-merge.pl +=================================================================== +--- tests/misc/sort-merge.pl.orig ++++ tests/misc/sort-merge.pl +@@ -26,6 +26,15 @@ my $prog = 'sort'; + # Turn off localization of executable's output. + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + ++my $mb_locale; ++# uncommented according to upstream commit enabling multibyte paths ++$mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + # three empty files and one that says 'foo' + my @inputs = (+(map{{IN=> {"empty$_"=> ''}}}1..3), {IN=> {foo=> "foo\n"}}); + +@@ -77,6 +86,39 @@ my @Tests = + {OUT=>$big_input}], + ); + ++# Add _POSIX2_VERSION=199209 to the environment of each test ++# that uses an old-style option like +1. ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether sort is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ next if ($test_name =~ "nmerge-."); ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ ++@Tests = triple_test \@Tests; ++ + my $save_temps = $ENV{DEBUG}; + my $verbose = $ENV{VERBOSE}; + +Index: tests/misc/sort.pl +=================================================================== +--- tests/misc/sort.pl.orig ++++ tests/misc/sort.pl +@@ -24,10 +24,15 @@ my $prog = 'sort'; + # Turn off localization of executable's output. + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + +-my $mb_locale = $ENV{LOCALE_FR_UTF8}; ++my $mb_locale; ++#Comment out next line to disable multibyte tests ++$mb_locale = $ENV{LOCALE_FR_UTF8}; + ! defined $mb_locale || $mb_locale eq 'none' + and $mb_locale = 'C'; + ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + # Since each test is run with a file name and with redirected stdin, + # the name in the diagnostic is either the file name or "-". + # Normalize each diagnostic to use '-'. +@@ -415,6 +420,37 @@ foreach my $t (@Tests) + } + } + ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether sort is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ #disable several failing tests until investigation, disable all tests with envvars set ++ next if (grep {ref $_ eq 'HASH' && exists $_->{ENV}} (@new_t)); ++ next if ($test_name =~ "18g" or $test_name =~ "sort-numeric" or $test_name =~ "08[ab]" or $test_name =~ "03[def]" or $test_name =~ "h4" or $test_name =~ "n1"); ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ + @Tests = triple_test \@Tests; + + # Remember that triple_test creates from each test with exactly one "IN" +Index: tests/misc/unexpand.pl +=================================================================== +--- tests/misc/unexpand.pl.orig ++++ tests/misc/unexpand.pl +@@ -27,6 +27,14 @@ my $limits = getlimits (); + + my $prog = 'unexpand'; + ++# comment out next line to disable multibyte tests ++my $mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + my @Tests = + ( + ['a1', {IN=> ' 'x 1 ."y\n"}, {OUT=> ' 'x 1 ."y\n"}], +@@ -92,6 +100,37 @@ my @Tests = + {EXIT => 1}, {ERR => "$prog: tab stop value is too large\n"}], + ); + ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether unexpand is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ next if ($test_name =~ 'b-1'); ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ ++@Tests = triple_test \@Tests; ++ + my $save_temps = $ENV{DEBUG}; + my $verbose = $ENV{VERBOSE}; + +Index: tests/misc/uniq.pl +=================================================================== +--- tests/misc/uniq.pl.orig ++++ tests/misc/uniq.pl +@@ -23,9 +23,17 @@ my $limits = getlimits (); + my $prog = 'uniq'; + my $try = "Try '$prog --help' for more information.\n"; + ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + # Turn off localization of executable's output. + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + ++my $mb_locale; ++#Comment out next line to disable multibyte tests ++$mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ + # When possible, create a "-z"-testing variant of each test. + sub add_z_variants($) + { +@@ -208,6 +216,44 @@ foreach my $t (@Tests) + and push @$t, {ENV=>'_POSIX2_VERSION=199209'}; + } + ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether uniq is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ next if ($test_name =~ "schar" or $test_name =~ "^obs-plus" or $test_name =~ "119"); ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ ++# Remember that triple_test creates from each test with exactly one "IN" ++# file two more tests (.p and .r suffix on name) corresponding to reading ++# input from a file and from a pipe. The pipe-reading test would fail ++# due to a race condition about 1 in 20 times. ++# Remove the IN_PIPE version of the "output-is-input" test above. ++# The others aren't susceptible because they have three inputs each. ++ ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; ++ + @Tests = add_z_variants \@Tests; + @Tests = triple_test \@Tests; + +Index: tests/pr/pr-tests.pl +=================================================================== +--- tests/pr/pr-tests.pl.orig ++++ tests/pr/pr-tests.pl +@@ -23,6 +23,15 @@ use strict; + + my $prog = 'pr'; + ++my $mb_locale; ++#Uncomment the following line to enable multibyte tests ++$mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + my @tv = ( + + # -b option is no longer an official option. But it's still working to +@@ -466,8 +475,48 @@ push @Tests, + {IN=>{3=>"x\ty\tz\n"}}, + {OUT=>join("\t", qw(a b c m n o x y z)) . "\n"} ]; + ++# Add _POSIX2_VERSION=199209 to the environment of each test ++# that uses an old-style option like +1. ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether pr is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ #temporarily skip some failing tests ++ next if ($test_name =~ "col-0" or $test_name =~ "col-inval"); ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ + @Tests = triple_test \@Tests; + ++# Remember that triple_test creates from each test with exactly one "IN" ++# file two more tests (.p and .r suffix on name) corresponding to reading ++# input from a file and from a pipe. The pipe-reading test would fail ++# due to a race condition about 1 in 20 times. ++# Remove the IN_PIPE version of the "output-is-input" test above. ++# The others aren't susceptible because they have three inputs each. ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; ++ + my $save_temps = $ENV{DEBUG}; + my $verbose = $ENV{VERBOSE}; + diff --git a/coreutils-testsuite.changes b/coreutils-testsuite.changes index 7d477e1..be59278 100644 --- a/coreutils-testsuite.changes +++ b/coreutils-testsuite.changes @@ -1,3 +1,15 @@ +------------------------------------------------------------------- +Sun Dec 1 22:48:48 UTC 2013 - mail@bernhard-voelker.de + +- Update I18N patch from Fedora: + (coreutils-i18n.patch) + * sort: fix multibyte incompabilities (rh#821264) + * pr -e, with a mix of backspaces and TABs, could corrupt the + heap in multibyte locales (analyzed by J.Koncicky) + * path in the testsuite to cover i18n regressions + * Enable cut and sort-merge perl tests for multibyte as well +- Refresh longlong-aarch64.patch. + ------------------------------------------------------------------- Wed Aug 7 08:10:22 UTC 2013 - mail@bernhard-voelker.de diff --git a/coreutils.changes b/coreutils.changes index 7d477e1..be59278 100644 --- a/coreutils.changes +++ b/coreutils.changes @@ -1,3 +1,15 @@ +------------------------------------------------------------------- +Sun Dec 1 22:48:48 UTC 2013 - mail@bernhard-voelker.de + +- Update I18N patch from Fedora: + (coreutils-i18n.patch) + * sort: fix multibyte incompabilities (rh#821264) + * pr -e, with a mix of backspaces and TABs, could corrupt the + heap in multibyte locales (analyzed by J.Koncicky) + * path in the testsuite to cover i18n regressions + * Enable cut and sort-merge perl tests for multibyte as well +- Refresh longlong-aarch64.patch. + ------------------------------------------------------------------- Wed Aug 7 08:10:22 UTC 2013 - mail@bernhard-voelker.de diff --git a/longlong-aarch64.patch b/longlong-aarch64.patch index facdbf2..3d28653 100644 --- a/longlong-aarch64.patch +++ b/longlong-aarch64.patch @@ -11,11 +11,11 @@ Reported at https://bugzilla.redhat.com/917735 src/longlong.h | 21 +++++++-------------- 2 files changed, 11 insertions(+), 14 deletions(-) -diff --git a/src/longlong.h b/src/longlong.h -index 4681642..eba2417 100644 ---- a/src/longlong.h -+++ b/src/longlong.h -@@ -529,23 +529,16 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); +Index: coreutils-8.21/src/longlong.h +=================================================================== +--- coreutils-8.21.orig/src/longlong.h ++++ coreutils-8.21/src/longlong.h +@@ -529,23 +529,16 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype #endif /* __arm__ */ #if defined (__aarch64__) && W_TYPE_SIZE == 64 @@ -46,6 +46,3 @@ index 4681642..eba2417 100644 #define umul_ppmm(ph, pl, m0, m1) \ do { \ UDItype __m0 = (m0), __m1 = (m1); \ --- -1.8.3 -