g_str_tokenize_and_fold: do proper transliteration

g_str_tokenize_and_fold() can now do proper locale-sensitive
transliteration for ascii alternatives.

https://bugzilla.gnome.org/show_bug.cgi?id=710142
This commit is contained in:
Ryan Lortie 2014-02-17 13:15:55 -05:00
parent d7291760df
commit a8ea3dc03b

View File

@ -2961,7 +2961,6 @@ g_str_tokenize_and_fold (const gchar *string,
result = split_words (string); result = split_words (string);
/* TODO: proper iconv transliteration (locale-dependent) */
if (ascii_alternates) if (ascii_alternates)
{ {
gint i, j, n; gint i, j, n;
@ -2974,21 +2973,26 @@ g_str_tokenize_and_fold (const gchar *string,
{ {
if (!g_str_is_ascii (result[i])) if (!g_str_is_ascii (result[i]))
{ {
gchar *decomposed; gchar *composed;
gchar *ascii; gchar *ascii;
gint k = 0; gint k;
gint l = 0;
decomposed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL); composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
ascii = g_malloc (strlen (decomposed) + 1);
for (k = 0; decomposed[k]; k++) ascii = g_str_to_ascii (composed, translit_locale);
if (~decomposed[k] & 0x80)
ascii[l++] = decomposed[k];
ascii[l] = '\0';
(*ascii_alternates)[j++] = ascii; /* Only accept strings that are now entirely alnums */
g_free (decomposed); for (k = 0; ascii[k]; k++)
if (!g_ascii_isalnum (ascii[k]))
break;
if (ascii[k] == '\0')
/* Made it to the end... */
(*ascii_alternates)[j++] = ascii;
else
g_free (ascii);
g_free (composed);
} }
} }