mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-10-02 03:46:39 +02:00
regex: Add G_REGEX_RAW_LOCK
When using G_REGEX_RAW, the pattern could still unexpectedly to the application switch to using UTF mode by using "(*UTF)". PCRE 8.33 adds a new flag to prevent this.
This commit is contained in:
@@ -125,7 +125,8 @@
|
|||||||
G_REGEX_NEWLINE_CRLF | \
|
G_REGEX_NEWLINE_CRLF | \
|
||||||
G_REGEX_NEWLINE_ANYCRLF | \
|
G_REGEX_NEWLINE_ANYCRLF | \
|
||||||
G_REGEX_BSR_ANYCRLF | \
|
G_REGEX_BSR_ANYCRLF | \
|
||||||
G_REGEX_JAVASCRIPT_COMPAT)
|
G_REGEX_JAVASCRIPT_COMPAT | \
|
||||||
|
G_REGEX_RAW_LOCK)
|
||||||
|
|
||||||
/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
|
/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
|
||||||
#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
|
#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
|
||||||
@@ -166,6 +167,7 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
|
|||||||
G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
||||||
G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
||||||
G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
|
G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
|
||||||
|
G_STATIC_ASSERT (G_REGEX_RAW_LOCK == PCRE_NEVER_UTF);
|
||||||
|
|
||||||
G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
|
G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
|
||||||
G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
|
G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
|
||||||
@@ -518,6 +520,9 @@ translate_compile_error (gint *errcode, const gchar **errmsg)
|
|||||||
case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE:
|
case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE:
|
||||||
*errmsg = _("character value in \\u.... sequence is too large");
|
*errmsg = _("character value in \\u.... sequence is too large");
|
||||||
break;
|
break;
|
||||||
|
case G_REGEX_ERROR_RAW_LOCK:
|
||||||
|
*errmsg = _("switching to UTF-8 mode is disallowed");
|
||||||
|
break;
|
||||||
|
|
||||||
case 116: /* erroffset passed as NULL */
|
case 116: /* erroffset passed as NULL */
|
||||||
/* This should not happen as we never pass a NULL erroffset */
|
/* This should not happen as we never pass a NULL erroffset */
|
||||||
@@ -1302,6 +1307,7 @@ g_regex_new (const gchar *pattern,
|
|||||||
g_return_val_if_fail (pattern != NULL, NULL);
|
g_return_val_if_fail (pattern != NULL, NULL);
|
||||||
g_return_val_if_fail (error == NULL || *error == NULL, NULL);
|
g_return_val_if_fail (error == NULL || *error == NULL, NULL);
|
||||||
g_return_val_if_fail ((compile_options & ~G_REGEX_COMPILE_MASK) == 0, NULL);
|
g_return_val_if_fail ((compile_options & ~G_REGEX_COMPILE_MASK) == 0, NULL);
|
||||||
|
g_return_val_if_fail ((compile_options & G_REGEX_RAW) || (compile_options & G_REGEX_RAW_LOCK) == 0, NULL);
|
||||||
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL);
|
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL);
|
||||||
|
|
||||||
if (g_once_init_enter (&initialised))
|
if (g_once_init_enter (&initialised))
|
||||||
|
@@ -133,6 +133,9 @@ G_BEGIN_DECLS
|
|||||||
* "(*SKIP)", or "(*THEN)". Since: 2.34
|
* "(*SKIP)", or "(*THEN)". Since: 2.34
|
||||||
* @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is
|
* @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is
|
||||||
* too large. Since: 2.34
|
* too large. Since: 2.34
|
||||||
|
* @G_REGEX_ERROR_RAW_LOCK: switching to UTF-8 mode from the pattern is disallowed.
|
||||||
|
* This happens when using %G_REGEX_RAW_LOCK and the pattern contains "(*UTF)"
|
||||||
|
* or "(*UTF8)". Since: 2.44
|
||||||
*
|
*
|
||||||
* Error codes returned by regular expressions functions.
|
* Error codes returned by regular expressions functions.
|
||||||
*
|
*
|
||||||
@@ -198,7 +201,8 @@ typedef enum
|
|||||||
G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171,
|
G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171,
|
||||||
G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172,
|
G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172,
|
||||||
G_REGEX_ERROR_NAME_TOO_LONG = 175,
|
G_REGEX_ERROR_NAME_TOO_LONG = 175,
|
||||||
G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176
|
G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176,
|
||||||
|
G_REGEX_ERROR_RAW_LOCK = 178
|
||||||
} GRegexError;
|
} GRegexError;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -286,6 +290,8 @@ GQuark g_regex_error_quark (void);
|
|||||||
* characters '\r', '\n' and '\r\n'. Since: 2.34
|
* characters '\r', '\n' and '\r\n'. Since: 2.34
|
||||||
* @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
|
* @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
|
||||||
* JavaScript rather than PCRE. Since: 2.34
|
* JavaScript rather than PCRE. Since: 2.34
|
||||||
|
* @G_REGEX_RAW_LOCK: Disallow switching to UTF-8 mode via "(*UTF)" in the pattern
|
||||||
|
* being compiled. %G_REGEX_RAW must also be set when using this flag. Since: 2.44
|
||||||
*
|
*
|
||||||
* Flags specifying compile-time options.
|
* Flags specifying compile-time options.
|
||||||
*
|
*
|
||||||
@@ -313,7 +319,8 @@ typedef enum
|
|||||||
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
|
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
|
||||||
G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
|
G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
|
||||||
G_REGEX_BSR_ANYCRLF = 1 << 23,
|
G_REGEX_BSR_ANYCRLF = 1 << 23,
|
||||||
G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
|
G_REGEX_JAVASCRIPT_COMPAT = 1 << 25,
|
||||||
|
G_REGEX_RAW_LOCK = 1 << 16
|
||||||
} GRegexCompileFlags;
|
} GRegexCompileFlags;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -2279,6 +2279,7 @@ main (int argc, char *argv[])
|
|||||||
TEST_NEW_FAIL ("a[\\NB]c", 0, G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS);
|
TEST_NEW_FAIL ("a[\\NB]c", 0, G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS);
|
||||||
TEST_NEW_FAIL ("(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEFG)XX", 0, G_REGEX_ERROR_NAME_TOO_LONG);
|
TEST_NEW_FAIL ("(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEFG)XX", 0, G_REGEX_ERROR_NAME_TOO_LONG);
|
||||||
TEST_NEW_FAIL ("\\u0100", G_REGEX_RAW | G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE);
|
TEST_NEW_FAIL ("\\u0100", G_REGEX_RAW | G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE);
|
||||||
|
TEST_NEW_FAIL ("(*UTF)", G_REGEX_RAW | G_REGEX_RAW_LOCK, G_REGEX_ERROR_RAW_LOCK);
|
||||||
|
|
||||||
/* These errors can't really be tested sanely:
|
/* These errors can't really be tested sanely:
|
||||||
* G_REGEX_ERROR_EXPRESSION_TOO_LARGE
|
* G_REGEX_ERROR_EXPRESSION_TOO_LARGE
|
||||||
|
Reference in New Issue
Block a user