diff --git a/glib/gregex.c b/glib/gregex.c index 06580412c..ad75bd80f 100644 --- a/glib/gregex.c +++ b/glib/gregex.c @@ -125,7 +125,8 @@ G_REGEX_NEWLINE_CRLF | \ G_REGEX_NEWLINE_ANYCRLF | \ G_REGEX_BSR_ANYCRLF | \ - G_REGEX_JAVASCRIPT_COMPAT) + G_REGEX_JAVASCRIPT_COMPAT | \ + G_REGEX_RAW_LOCK) /* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */ #define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK) @@ -166,6 +167,7 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF); G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF); G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF); G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT); +G_STATIC_ASSERT (G_REGEX_RAW_LOCK == PCRE_NEVER_UTF); G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED); G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL); @@ -518,6 +520,9 @@ translate_compile_error (gint *errcode, const gchar **errmsg) case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: *errmsg = _("character value in \\u.... sequence is too large"); break; + case G_REGEX_ERROR_RAW_LOCK: + *errmsg = _("switching to UTF-8 mode is disallowed"); + break; case 116: /* erroffset passed as NULL */ /* This should not happen as we never pass a NULL erroffset */ @@ -1302,6 +1307,7 @@ g_regex_new (const gchar *pattern, g_return_val_if_fail (pattern != NULL, NULL); g_return_val_if_fail (error == NULL || *error == NULL, NULL); g_return_val_if_fail ((compile_options & ~G_REGEX_COMPILE_MASK) == 0, NULL); + g_return_val_if_fail ((compile_options & G_REGEX_RAW) || (compile_options & G_REGEX_RAW_LOCK) == 0, NULL); g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); if (g_once_init_enter (&initialised)) diff --git a/glib/gregex.h b/glib/gregex.h index eff6d608e..b22fcf5ba 100644 --- a/glib/gregex.h +++ b/glib/gregex.h @@ -133,6 +133,9 @@ G_BEGIN_DECLS * "(*SKIP)", or "(*THEN)". Since: 2.34 * @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is * too large. Since: 2.34 + * @G_REGEX_ERROR_RAW_LOCK: switching to UTF-8 mode from the pattern is disallowed. + * This happens when using %G_REGEX_RAW_LOCK and the pattern contains "(*UTF)" + * or "(*UTF8)". Since: 2.44 * * Error codes returned by regular expressions functions. * @@ -198,7 +201,8 @@ typedef enum G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171, G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172, G_REGEX_ERROR_NAME_TOO_LONG = 175, - G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176 + G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176, + G_REGEX_ERROR_RAW_LOCK = 178 } GRegexError; /** @@ -286,6 +290,8 @@ GQuark g_regex_error_quark (void); * characters '\r', '\n' and '\r\n'. Since: 2.34 * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with * JavaScript rather than PCRE. Since: 2.34 + * @G_REGEX_RAW_LOCK: Disallow switching to UTF-8 mode via "(*UTF)" in the pattern + * being compiled. %G_REGEX_RAW must also be set when using this flag. Since: 2.44 * * Flags specifying compile-time options. * @@ -313,7 +319,8 @@ typedef enum G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF, G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22, G_REGEX_BSR_ANYCRLF = 1 << 23, - G_REGEX_JAVASCRIPT_COMPAT = 1 << 25 + G_REGEX_JAVASCRIPT_COMPAT = 1 << 25, + G_REGEX_RAW_LOCK = 1 << 16 } GRegexCompileFlags; /** diff --git a/glib/tests/regex.c b/glib/tests/regex.c index 92679b959..b3f2ecd2f 100644 --- a/glib/tests/regex.c +++ b/glib/tests/regex.c @@ -2279,6 +2279,7 @@ main (int argc, char *argv[]) TEST_NEW_FAIL ("a[\\NB]c", 0, G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS); TEST_NEW_FAIL ("(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEFG)XX", 0, G_REGEX_ERROR_NAME_TOO_LONG); TEST_NEW_FAIL ("\\u0100", G_REGEX_RAW | G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE); + TEST_NEW_FAIL ("(*UTF)", G_REGEX_RAW | G_REGEX_RAW_LOCK, G_REGEX_ERROR_RAW_LOCK); /* These errors can't really be tested sanely: * G_REGEX_ERROR_EXPRESSION_TOO_LARGE