mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-02-24 03:02:10 +01:00
regex: Add JavaScript compat mode
Since PCRE 7.7, there's a flag that changes the behaviour to be more JavaScript compatible. Since it's no effort to expose it, just do so.
This commit is contained in:
parent
5ebea3c467
commit
c28a9ed498
@ -114,7 +114,8 @@
|
|||||||
G_REGEX_NEWLINE_LF | \
|
G_REGEX_NEWLINE_LF | \
|
||||||
G_REGEX_NEWLINE_CRLF | \
|
G_REGEX_NEWLINE_CRLF | \
|
||||||
G_REGEX_NEWLINE_ANYCRLF | \
|
G_REGEX_NEWLINE_ANYCRLF | \
|
||||||
G_REGEX_BSR_ANYCRLF)
|
G_REGEX_BSR_ANYCRLF | \
|
||||||
|
G_REGEX_JAVASCRIPT_COMPAT)
|
||||||
|
|
||||||
/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
|
/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
|
||||||
#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
|
#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
|
||||||
@ -136,20 +137,21 @@
|
|||||||
G_REGEX_MATCH_BSR_ANY)
|
G_REGEX_MATCH_BSR_ANY)
|
||||||
|
|
||||||
/* we rely on these flags having the same values */
|
/* we rely on these flags having the same values */
|
||||||
G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
|
G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
|
||||||
G_STATIC_ASSERT (G_REGEX_MULTILINE == PCRE_MULTILINE);
|
G_STATIC_ASSERT (G_REGEX_MULTILINE == PCRE_MULTILINE);
|
||||||
G_STATIC_ASSERT (G_REGEX_DOTALL == PCRE_DOTALL);
|
G_STATIC_ASSERT (G_REGEX_DOTALL == PCRE_DOTALL);
|
||||||
G_STATIC_ASSERT (G_REGEX_EXTENDED == PCRE_EXTENDED);
|
G_STATIC_ASSERT (G_REGEX_EXTENDED == PCRE_EXTENDED);
|
||||||
G_STATIC_ASSERT (G_REGEX_ANCHORED == PCRE_ANCHORED);
|
G_STATIC_ASSERT (G_REGEX_ANCHORED == PCRE_ANCHORED);
|
||||||
G_STATIC_ASSERT (G_REGEX_DOLLAR_ENDONLY == PCRE_DOLLAR_ENDONLY);
|
G_STATIC_ASSERT (G_REGEX_DOLLAR_ENDONLY == PCRE_DOLLAR_ENDONLY);
|
||||||
G_STATIC_ASSERT (G_REGEX_UNGREEDY == PCRE_UNGREEDY);
|
G_STATIC_ASSERT (G_REGEX_UNGREEDY == PCRE_UNGREEDY);
|
||||||
G_STATIC_ASSERT (G_REGEX_NO_AUTO_CAPTURE == PCRE_NO_AUTO_CAPTURE);
|
G_STATIC_ASSERT (G_REGEX_NO_AUTO_CAPTURE == PCRE_NO_AUTO_CAPTURE);
|
||||||
G_STATIC_ASSERT (G_REGEX_DUPNAMES == PCRE_DUPNAMES);
|
G_STATIC_ASSERT (G_REGEX_DUPNAMES == PCRE_DUPNAMES);
|
||||||
G_STATIC_ASSERT (G_REGEX_NEWLINE_CR == PCRE_NEWLINE_CR);
|
G_STATIC_ASSERT (G_REGEX_NEWLINE_CR == PCRE_NEWLINE_CR);
|
||||||
G_STATIC_ASSERT (G_REGEX_NEWLINE_LF == PCRE_NEWLINE_LF);
|
G_STATIC_ASSERT (G_REGEX_NEWLINE_LF == PCRE_NEWLINE_LF);
|
||||||
G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
|
G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
|
||||||
G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
||||||
G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
||||||
|
G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
|
||||||
|
|
||||||
G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
|
G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
|
||||||
G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
|
G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
|
||||||
@ -472,6 +474,9 @@ translate_compile_error (gint *errcode, const gchar **errmsg)
|
|||||||
case G_REGEX_ERROR_MISSING_DIGIT:
|
case G_REGEX_ERROR_MISSING_DIGIT:
|
||||||
*errmsg = _("digit expected after (?+");
|
*errmsg = _("digit expected after (?+");
|
||||||
break;
|
break;
|
||||||
|
case G_REGEX_ERROR_INVALID_DATA_CHARACTER:
|
||||||
|
*errmsg = _("] is an invalid data character in JavaScript compatibility mode");
|
||||||
|
break;
|
||||||
case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME:
|
case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME:
|
||||||
*errmsg = _("different names for subpatterns of the same number are not allowed");
|
*errmsg = _("different names for subpatterns of the same number are not allowed");
|
||||||
break;
|
break;
|
||||||
@ -513,11 +518,6 @@ translate_compile_error (gint *errcode, const gchar **errmsg)
|
|||||||
case 174: /* invalid UTF-16 string */
|
case 174: /* invalid UTF-16 string */
|
||||||
/* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE
|
/* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE
|
||||||
* and we do not check if strings are valid */
|
* and we do not check if strings are valid */
|
||||||
case 164: /* ] is an invalid data character in JavaScript compatibility mode */
|
|
||||||
/* This should not happen as we don't use PCRE_JAVASCRIPT_COMPAT */
|
|
||||||
g_warning ("%s", *errmsg);
|
|
||||||
*errcode = G_REGEX_ERROR_COMPILE;
|
|
||||||
break;
|
|
||||||
case 170: /* internal error: unknown opcode in find_fixedlength() */
|
case 170: /* internal error: unknown opcode in find_fixedlength() */
|
||||||
*errcode = G_REGEX_ERROR_INTERNAL;
|
*errcode = G_REGEX_ERROR_INTERNAL;
|
||||||
break;
|
break;
|
||||||
|
@ -116,6 +116,8 @@ G_BEGIN_DECLS
|
|||||||
* control verb. Since: 2.34
|
* control verb. Since: 2.34
|
||||||
* @G_REGEX_ERROR_NUMBER_TOO_BIG: number is too big in escape sequence. Since: 2.34
|
* @G_REGEX_ERROR_NUMBER_TOO_BIG: number is too big in escape sequence. Since: 2.34
|
||||||
* @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: Missing subpattern name. Since: 2.34
|
* @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: Missing subpattern name. Since: 2.34
|
||||||
|
* @G_REGEX_ERROR_INVALID_DATA_CHARACTER: In JavaScript compatibility mode,
|
||||||
|
* "[" is an invalid data character. Since: 2.34
|
||||||
* @G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: different names for subpatterns of the
|
* @G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: different names for subpatterns of the
|
||||||
* same number are not allowed. Since: 2.34
|
* same number are not allowed. Since: 2.34
|
||||||
* @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: the backtracing control
|
* @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: the backtracing control
|
||||||
@ -185,6 +187,7 @@ typedef enum
|
|||||||
G_REGEX_ERROR_NUMBER_TOO_BIG = 161,
|
G_REGEX_ERROR_NUMBER_TOO_BIG = 161,
|
||||||
G_REGEX_ERROR_MISSING_SUBPATTERN_NAME = 162,
|
G_REGEX_ERROR_MISSING_SUBPATTERN_NAME = 162,
|
||||||
G_REGEX_ERROR_MISSING_DIGIT = 163,
|
G_REGEX_ERROR_MISSING_DIGIT = 163,
|
||||||
|
G_REGEX_ERROR_INVALID_DATA_CHARACTER = 164,
|
||||||
G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME = 165,
|
G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME = 165,
|
||||||
G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166,
|
G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166,
|
||||||
G_REGEX_ERROR_INVALID_CONTROL_CHAR = 168,
|
G_REGEX_ERROR_INVALID_CONTROL_CHAR = 168,
|
||||||
@ -299,7 +302,8 @@ typedef enum
|
|||||||
G_REGEX_NEWLINE_LF = 1 << 21,
|
G_REGEX_NEWLINE_LF = 1 << 21,
|
||||||
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
|
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
|
||||||
G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
|
G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
|
||||||
G_REGEX_BSR_ANYCRLF = 1 << 23
|
G_REGEX_BSR_ANYCRLF = 1 << 23,
|
||||||
|
G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
|
||||||
} GRegexCompileFlags;
|
} GRegexCompileFlags;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -355,6 +359,8 @@ typedef enum
|
|||||||
* single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
|
* single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
|
||||||
* U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
|
* U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
|
||||||
* U+2029 PARAGRAPH SEPARATOR. Since: 2.34
|
* U+2029 PARAGRAPH SEPARATOR. Since: 2.34
|
||||||
|
* @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
|
||||||
|
* JavaScript rather than PCRE. Since: 2.34
|
||||||
*
|
*
|
||||||
* Flags specifying match-time options.
|
* Flags specifying match-time options.
|
||||||
*
|
*
|
||||||
|
@ -2134,6 +2134,7 @@ main (int argc, char *argv[])
|
|||||||
TEST_NEW_FAIL ("(?i:A{1,}\\6666666666)", 0, G_REGEX_ERROR_NUMBER_TOO_BIG);
|
TEST_NEW_FAIL ("(?i:A{1,}\\6666666666)", 0, G_REGEX_ERROR_NUMBER_TOO_BIG);
|
||||||
TEST_NEW_FAIL ("(?<a>)(?&)", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME);
|
TEST_NEW_FAIL ("(?<a>)(?&)", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME);
|
||||||
TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_MISSING_DIGIT);
|
TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_MISSING_DIGIT);
|
||||||
|
TEST_NEW_FAIL ("TA]", G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_INVALID_DATA_CHARACTER);
|
||||||
TEST_NEW_FAIL ("(?|(?<a>A)|(?<b>B))", 0, G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME);
|
TEST_NEW_FAIL ("(?|(?<a>A)|(?<b>B))", 0, G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME);
|
||||||
TEST_NEW_FAIL ("a(*MARK)b", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED);
|
TEST_NEW_FAIL ("a(*MARK)b", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED);
|
||||||
TEST_NEW_FAIL ("^\\c€", 0, G_REGEX_ERROR_INVALID_CONTROL_CHAR);
|
TEST_NEW_FAIL ("^\\c€", 0, G_REGEX_ERROR_INVALID_CONTROL_CHAR);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user