guniprop: Fix g_utf8_strdown() for Turkish locale

In the Turkish locale the lowercase equivalent of a capital I with dot above
is a normal lowercase i with a dot above.

Fixes part of issue #390
This commit is contained in:
Kjell Ahlstedt 2021-02-08 16:32:41 +01:00 committed by Sebastian Dröge
parent 3c5339266d
commit b9a4897900
2 changed files with 52 additions and 6 deletions

View File

@ -987,14 +987,18 @@ real_tolower (const gchar *str,
last = p;
p = g_utf8_next_char (p);
if (locale_type == LOCALE_TURKIC && (c == 'I' ||
if (locale_type == LOCALE_TURKIC && (c == 'I' || c == 0x130 ||
c == G_UNICHAR_FULLWIDTH_I))
{
if (g_utf8_get_char (p) == 0x0307)
{
gboolean combining_dot = (c == 'I' || c == G_UNICHAR_FULLWIDTH_I) &&
g_utf8_get_char (p) == 0x0307;
if (combining_dot || c == 0x130)
{
/* I + COMBINING DOT ABOVE => i (U+0069) */
/* I + COMBINING DOT ABOVE => i (U+0069)
* LATIN CAPITAL LETTER I WITH DOT ABOVE => i (U+0069) */
len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
p = g_utf8_next_char (p);
if (combining_dot)
p = g_utf8_next_char (p);
}
else
{

View File

@ -464,6 +464,47 @@ test_strdown (void)
g_free (str_down);
}
/* Test that g_utf8_strup() and g_utf8_strdown() return the correct
* value for Turkish 'i' with and without dot above. */
static void
test_turkish_strupdown (void)
{
char *str_up = NULL;
char *str_down = NULL;
const char *str = "iII"
"\xcc\x87" /* COMBINING DOT ABOVE (U+307) */
"\xc4\xb1" /* LATIN SMALL LETTER DOTLESS I (U+131) */
"\xc4\xb0"; /* LATIN CAPITAL LETTER I WITH DOT ABOVE (U+130) */
char *oldlocale = g_strdup (setlocale (LC_ALL, "tr_TR"));
if (oldlocale == NULL)
{
g_test_skip ("locale tr_TR not available");
return;
}
str_up = g_utf8_strup (str, strlen (str));
str_down = g_utf8_strdown (str, strlen (str));
/* i => LATIN CAPITAL LETTER I WITH DOT ABOVE,
* I => I,
* I + COMBINING DOT ABOVE => I + COMBINING DOT ABOVE,
* LATIN SMALL LETTER DOTLESS I => I,
* LATIN CAPITAL LETTER I WITH DOT ABOVE => LATIN CAPITAL LETTER I WITH DOT ABOVE */
g_assert_cmpstr (str_up, ==, "\xc4\xb0II\xcc\x87I\xc4\xb0");
/* i => i,
* I => LATIN SMALL LETTER DOTLESS I,
* I + COMBINING DOT ABOVE => i,
* LATIN SMALL LETTER DOTLESS I => LATIN SMALL LETTER DOTLESS I,
* LATIN CAPITAL LETTER I WITH DOT ABOVE => i */
g_assert_cmpstr (str_down, ==, "i\xc4\xb1i\xc4\xb1i");
g_free (str_up);
g_free (str_down);
setlocale (LC_ALL, oldlocale);
g_free (oldlocale);
}
/* Test that g_utf8_casefold() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
@ -1644,6 +1685,7 @@ main (int argc,
g_test_add_func ("/unicode/space", test_space);
g_test_add_func ("/unicode/strdown", test_strdown);
g_test_add_func ("/unicode/strup", test_strup);
g_test_add_func ("/unicode/turkish-strupdown", test_turkish_strupdown);
g_test_add_func ("/unicode/title", test_title);
g_test_add_func ("/unicode/upper", test_upper);
g_test_add_func ("/unicode/validate", test_unichar_validate);