From 53b48dfd3bce21fc6b52128859b01329efa10d52 Mon Sep 17 00:00:00 2001 From: Christian Persch Date: Sun, 17 Jun 2012 22:51:44 +0200 Subject: [PATCH] regex: Fix unicode othercasing The old _pcre_ucp_othercase() function was wrong in returning NOTACHAR (0xffffffff) for characters that aren't changed by upper- and lower-casing. This led to PCRE internally using incorrect (or at least inefficient) character classes when using G_REGEX_CASELESS. E.g. [Z-\x{100}] turned into: [Z\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{39c}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{178}z-\x{101}] instead of the expected and efficient [Z\x{39c}\x{178}z-\x{101}] https://bugzilla.gnome.org/show_bug.cgi?id=678273 --- glib/pcre/pcre_tables.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c index 034779654..5bac85504 100644 --- a/glib/pcre/pcre_tables.c +++ b/glib/pcre/pcre_tables.c @@ -584,20 +584,17 @@ const ucp_type_table PRIV(utt)[] = { const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); -unsigned int +unsigned int _pcre_ucp_othercase(const unsigned int c) { - int other_case = NOTACHAR; + unsigned int oc; - if (g_unichar_islower(c)) - other_case = g_unichar_toupper(c); - else if (g_unichar_isupper(c)) - other_case = g_unichar_tolower(c); + if ((oc = g_unichar_tolower(c)) != c) + return oc; + if ((oc = g_unichar_toupper(c)) != c) + return oc; - if (other_case == c) - other_case = NOTACHAR; - - return other_case; + return c; } #endif /* SUPPORT_UTF */