Subject: sort i18n: make sure to NUL-terminate the sort keys Fixes http://bugs.gnu.org/18540 * src/sort.c (keycompare_mb): use the keys alone, i.e. null-terminated. Patch analogously to non-multibyte upstream patch: http://git.sv.gnu.org/cgit/coreutils.git/commit/?id=b877ea4b3e * tests/misc/sort.pl (23): Add test, taken from upstream patch: http://git.sv.gnu.org/cgit/coreutils.git/commit/?id=4d8c4dfc21 (11a): Exempt from MB run, as the collation order in the French locale would provoke a false positive, because the strcoll() comparison between the trailing blank and tabs leads to a different result there. (11b): Likewise. --- src/sort.c | 22 ++++++++++++++++++++-- tests/misc/sort.pl | 5 +++++ 2 files changed, 25 insertions(+), 2 deletions(-) Index: src/sort.c =================================================================== --- src/sort.c.orig +++ src/sort.c @@ -3236,6 +3236,9 @@ keycompare_mb (const struct line *a, con size_t lena = lima <= texta ? 0 : lima - texta; size_t lenb = limb <= textb ? 0 : limb - textb; + char enda IF_LINT (= 0); + char endb IF_LINT (= 0); + char const *translate = key->translate; bool const *ignore = key->ignore; @@ -3253,6 +3256,12 @@ keycompare_mb (const struct line *a, con texta = copy_a; textb = copy_b; lena = new_len_a; lenb = new_len_b; } + else + { + /* Use the keys in-place, temporarily null-terminated. */ + enda = texta[lena]; texta[lena] = '\0'; + endb = textb[lenb]; textb[lenb] = '\0'; + } if (key->random) diff = compare_random (texta, lena, textb, lenb); @@ -3276,13 +3285,22 @@ keycompare_mb (const struct line *a, con diff = 1; else if (hard_LC_COLLATE && !folding) { - diff = xmemcoll0 (texta, lena, textb, lenb); + diff = xmemcoll0 (texta, lena + 1, textb, lenb + 1); } else - diff = memcmp (texta, textb, MIN (lena + 1,lenb + 1)); + { + diff = memcmp (texta, textb, MIN (lena, lenb)); + if (diff == 0) + diff = lena < lenb ? -1 : lena != lenb; + } if (ignore || translate) free (texta); + else + { + texta[lena] = enda; + textb[lenb] = endb; + } if (diff) goto not_equal; Index: tests/misc/sort.pl =================================================================== --- tests/misc/sort.pl.orig +++ tests/misc/sort.pl @@ -322,6 +322,10 @@ my @Tests = ["22a", '-k 2,2fd -k 1,1r', {IN=>"3 b\n4 B\n"}, {OUT=>"4 B\n3 b\n"}], ["22b", '-k 2,2d -k 1,1r', {IN=>"3 b\n4 b\n"}, {OUT=>"4 b\n3 b\n"}], +# This fails in Fedora 20, per Göran Uddeborg in: http://bugs.gnu.org/18540 +["23", '-s -k1,1 -t/', {IN=>"a b/x\na-b-c/x\n"}, {OUT=>"a b/x\na-b-c/x\n"}, + {ENV => "LC_ALL=$mb_locale"}], + ["no-file1", 'no-file', {EXIT=>2}, {ERR=>$no_file}], # This test failed until 1.22f. Sort didn't give an error. # From Will Edgington. @@ -446,6 +450,7 @@ if ($mb_locale ne 'C') #disable several failing tests until investigation, disable all tests with envvars set next if (grep {ref $_ eq 'HASH' && exists $_->{ENV}} (@new_t)); next if ($test_name =~ "18g" or $test_name =~ "sort-numeric" or $test_name =~ "08[ab]" or $test_name =~ "03[def]" or $test_name =~ "h4" or $test_name =~ "n1" or $test_name =~ "2[01]a"); + next if ($test_name =~ "11[ab]"); # avoid FP: expected result differs to MB result due to collation rules. push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; } push @Tests, @new;