Subject: sort i18n: make sure to NUL-terminate the sort keys Fixes http://bugs.gnu.org/18540 * src/sort.c (keycompare_mb): use the keys alone, i.e. null-terminated. Patch analogously to non-multibyte upstream patch: http://git.sv.gnu.org/cgit/coreutils.git/commit/?id=b877ea4b3e * tests/misc/sort.pl (23): Add test, taken from upstream patch: http://git.sv.gnu.org/cgit/coreutils.git/commit/?id=4d8c4dfc21 (11a): Exempt from MB run, as the collation order in the French locale would provoke a false positive, because the strcoll() comparison between the trailing blank and tabs leads to a different result there. (11b): Likewise. --- src/sort.c | 22 ++++++++++++++++++++-- tests/misc/sort.pl | 1 + 2 files changed, 21 insertions(+), 2 deletions(-) Index: src/sort.c =================================================================== --- src/sort.c.orig 2015-07-09 14:50:08.426211492 +0200 +++ src/sort.c 2015-07-09 14:50:08.439211187 +0200 @@ -3235,6 +3235,9 @@ keycompare_mb (const struct line *a, con size_t lena = lima <= texta ? 0 : lima - texta; size_t lenb = limb <= textb ? 0 : limb - textb; + char enda IF_LINT (= 0); + char endb IF_LINT (= 0); + char const *translate = key->translate; bool const *ignore = key->ignore; @@ -3254,6 +3257,12 @@ keycompare_mb (const struct line *a, con texta = copy_a; textb = copy_b; lena = new_len_a; lenb = new_len_b; } + else + { + /* Use the keys in-place, temporarily null-terminated. */ + enda = texta[lena]; texta[lena] = '\0'; + endb = textb[lenb]; textb[lenb] = '\0'; + } if (key->random) diff = compare_random (texta, lena, textb, lenb); @@ -3277,13 +3286,22 @@ keycompare_mb (const struct line *a, con diff = 1; else if (hard_LC_COLLATE && !folding) { - diff = xmemcoll0 (texta, lena, textb, lenb); + diff = xmemcoll0 (texta, lena + 1, textb, lenb + 1); } else - diff = memcmp (texta, textb, MIN (lena + 1,lenb + 1)); + { + diff = memcmp (texta, textb, MIN (lena, lenb)); + if (diff == 0) + diff = lena < lenb ? -1 : lena != lenb; + } if (ignore || translate) free (texta); + else + { + texta[lena] = enda; + textb[lenb] = endb; + } if (diff) goto not_equal; Index: tests/misc/sort.pl =================================================================== --- tests/misc/sort.pl.orig 2015-07-09 14:50:08.429211422 +0200 +++ tests/misc/sort.pl 2015-07-09 14:50:08.440211164 +0200 @@ -450,6 +450,7 @@ if ($mb_locale ne 'C') #disable several failing tests until investigation, disable all tests with envvars set next if (grep {ref $_ eq 'HASH' && exists $_->{ENV}} (@new_t)); next if ($test_name =~ "18g" or $test_name =~ "sort-numeric" or $test_name =~ "08[ab]" or $test_name =~ "03[def]" or $test_name =~ "h4" or $test_name =~ "n1" or $test_name =~ "2[01]a"); + next if ($test_name =~ "11[ab]"); # avoid FP: expected result differs to MB result due to collation rules. push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; } push @Tests, @new;