From a8ea3dc03b203e18825aa4c8473c9ab832fbeddf Mon Sep 17 00:00:00 2001 From: Ryan Lortie Date: Mon, 17 Feb 2014 13:15:55 -0500 Subject: [PATCH] g_str_tokenize_and_fold: do proper transliteration g_str_tokenize_and_fold() can now do proper locale-sensitive transliteration for ascii alternatives. https://bugzilla.gnome.org/show_bug.cgi?id=710142 --- glib/gstrfuncs.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/glib/gstrfuncs.c b/glib/gstrfuncs.c index 58f8f9a40..e9b13c569 100644 --- a/glib/gstrfuncs.c +++ b/glib/gstrfuncs.c @@ -2961,7 +2961,6 @@ g_str_tokenize_and_fold (const gchar *string, result = split_words (string); - /* TODO: proper iconv transliteration (locale-dependent) */ if (ascii_alternates) { gint i, j, n; @@ -2974,21 +2973,26 @@ g_str_tokenize_and_fold (const gchar *string, { if (!g_str_is_ascii (result[i])) { - gchar *decomposed; + gchar *composed; gchar *ascii; - gint k = 0; - gint l = 0; + gint k; - decomposed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL); - ascii = g_malloc (strlen (decomposed) + 1); + composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE); - for (k = 0; decomposed[k]; k++) - if (~decomposed[k] & 0x80) - ascii[l++] = decomposed[k]; - ascii[l] = '\0'; + ascii = g_str_to_ascii (composed, translit_locale); - (*ascii_alternates)[j++] = ascii; - g_free (decomposed); + /* Only accept strings that are now entirely alnums */ + for (k = 0; ascii[k]; k++) + if (!g_ascii_isalnum (ascii[k])) + break; + + if (ascii[k] == '\0') + /* Made it to the end... */ + (*ascii_alternates)[j++] = ascii; + else + g_free (ascii); + + g_free (composed); } }