diff --git a/glib/gregex.c b/glib/gregex.c index fd8a9f597..a1383a459 100644 --- a/glib/gregex.c +++ b/glib/gregex.c @@ -267,13 +267,6 @@ translate_compile_error (gint *errcode, const gchar **errmsg) case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: *errmsg = _("unrecognized character follows \\"); break; - case 137: - /* A number of Perl escapes are not handled by PCRE. - * Therefore it explicitly raises ERR37. - */ - *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE; - *errmsg = _("case-changing escapes (\\l, \\L, \\u, \\U) are not allowed here"); - break; case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: *errmsg = _("numbers out of order in {} quantifier"); break; @@ -292,16 +285,12 @@ translate_compile_error (gint *errcode, const gchar **errmsg) case G_REGEX_ERROR_NOTHING_TO_REPEAT: *errmsg = _("nothing to repeat"); break; + case 111: /* internal error: unexpected repeat */ + *errcode = G_REGEX_ERROR_INTERNAL; + *errmsg = _("unexpected repeat"); + break; case G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: - *errmsg = _("unrecognized character after (?"); - break; - case 124: - *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; - *errmsg = _("unrecognized character after (?<"); - break; - case 141: - *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; - *errmsg = _("unrecognized character after (?P"); + *errmsg = _("unrecognized character after (? or (?-"); break; case G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: *errmsg = _("POSIX named classes are supported only within a class"); @@ -309,17 +298,6 @@ translate_compile_error (gint *errcode, const gchar **errmsg) case G_REGEX_ERROR_UNMATCHED_PARENTHESIS: *errmsg = _("missing terminating )"); break; - case 122: - *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; - *errmsg = _(") without opening ("); - break; - case 129: - *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; - /* translators: '(?R' and '(?[+-]digits' are both meant as (groups of) - * sequences here, '(?-54' would be an example for the second group. - */ - *errmsg = _("(?R or (?[+-]digits must be followed by )"); - break; case G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: *errmsg = _("reference to non-existent subpattern"); break; @@ -327,11 +305,23 @@ translate_compile_error (gint *errcode, const gchar **errmsg) *errmsg = _("missing ) after comment"); break; case G_REGEX_ERROR_EXPRESSION_TOO_LARGE: - *errmsg = _("regular expression too large"); + *errmsg = _("regular expression is too large"); break; case G_REGEX_ERROR_MEMORY_ERROR: *errmsg = _("failed to get memory"); break; + case 122: /* unmatched parentheses */ + *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; + *errmsg = _(") without opening ("); + break; + case 123: /* internal error: code overflow */ + *errcode = G_REGEX_ERROR_INTERNAL; + *errmsg = _("code overflow"); + break; + case 124: /* "unrecognized character after (?<\0 */ + *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; + *errmsg = _("unrecognized character after (?<"); + break; case G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: *errmsg = _("lookbehind assertion is not fixed length"); break; @@ -344,6 +334,13 @@ translate_compile_error (gint *errcode, const gchar **errmsg) case G_REGEX_ERROR_ASSERTION_EXPECTED: *errmsg = _("assertion expected after (?("); break; + case 129: + *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; + /* translators: '(?R' and '(?[+-]digits' are both meant as (groups of) + * sequences here, '(?-54' would be an example for the second group. + */ + *errmsg = _("(?R or (?[+-]digits must be followed by )"); + break; case G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: *errmsg = _("unknown POSIX class name"); break; @@ -359,9 +356,20 @@ translate_compile_error (gint *errcode, const gchar **errmsg) case G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: *errmsg = _("\\C not allowed in lookbehind assertion"); break; + case 137: /* PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0 */ + /* A number of Perl escapes are not handled by PCRE. + * Therefore it explicitly raises ERR37. + */ + *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE; + *errmsg = _("escapes \\L, \\l, \\N{name}, \\U, and \\u are not supported"); + break; case G_REGEX_ERROR_INFINITE_LOOP: *errmsg = _("recursive call could loop indefinitely"); break; + case 141: /* unrecognized character after (?P\0 */ + *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; + *errmsg = _("unrecognized character after (?P"); + break; case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: *errmsg = _("missing terminator in subpattern name"); break; @@ -383,6 +391,14 @@ translate_compile_error (gint *errcode, const gchar **errmsg) case G_REGEX_ERROR_INVALID_OCTAL_VALUE: *errmsg = _("octal value is greater than \\377"); break; + case 152: /* internal error: overran compiling workspace */ + *errcode = G_REGEX_ERROR_INTERNAL; + *errmsg = _("overran compiling workspace"); + break; + case 153: /* internal error: previously-checked referenced subpattern not found */ + *errcode = G_REGEX_ERROR_INTERNAL; + *errmsg = _("previously-checked referenced subpattern not found"); + break; case G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: *errmsg = _("DEFINE group contains more than one branch"); break; @@ -390,44 +406,77 @@ translate_compile_error (gint *errcode, const gchar **errmsg) *errmsg = _("inconsistent NEWLINE options"); break; case G_REGEX_ERROR_MISSING_BACK_REFERENCE: - *errmsg = _("\\g is not followed by a braced name or an optionally " - "braced non-zero number"); + *errmsg = _("\\g is not followed by a braced, angle-bracketed, or quoted name or " + "number, or by a plain number"); break; - case 111: - *errcode = G_REGEX_ERROR_INTERNAL; - *errmsg = _("unexpected repeat"); + case G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: + *errmsg = _("a numbered reference must not be zero"); break; - case 123: - *errcode = G_REGEX_ERROR_INTERNAL; - *errmsg = _("code overflow"); + case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: + *errmsg = _("an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)"); break; - case 152: - *errcode = G_REGEX_ERROR_INTERNAL; - *errmsg = _("overran compiling workspace"); + case G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: + *errmsg = _("(*VERB) not recognized"); break; - case 153: - *errcode = G_REGEX_ERROR_INTERNAL; - *errmsg = _("previously-checked referenced subpattern not found"); + case G_REGEX_ERROR_NUMBER_TOO_BIG: + *errmsg = _("number is too bug"); break; - case 116: + case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: + *errmsg = _("missing subpattern name after (?&"); + break; + case G_REGEX_ERROR_MISSING_DIGIT: + *errmsg = _("digit expected after (?+"); + break; + case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: + *errmsg = _("different names for subpatterns of the same number are not allowed"); + break; + case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: + *errmsg = _("(*MARK) must have an argument"); + break; + case G_REGEX_ERROR_INVALID_CONTROL_CHAR: + *errmsg = _( "\\c must be followed by an ASCII character"); + break; + case G_REGEX_ERROR_MISSING_NAME: + *errmsg = _("\\k is not followed by a braced, angle-bracketed, or quoted name"); + break; + case G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: + *errmsg = _("\\N is not supported in a class"); + break; + case G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: + *errmsg = _("too many forward references"); + break; + case G_REGEX_ERROR_NAME_TOO_LONG: + *errmsg = _("name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)"); + break; + + case 116: /* erroffset passed as NULL */ /* This should not happen as we never pass a NULL erroffset */ g_warning ("erroffset passed as NULL"); *errcode = G_REGEX_ERROR_COMPILE; break; - case 117: + case 117: /* unknown option bit(s) set */ /* This should not happen as we check options before passing them * to pcre_compile2() */ g_warning ("unknown option bit(s) set"); *errcode = G_REGEX_ERROR_COMPILE; break; - case 132: - case 144: - case 145: - /* These errors should not happen as we are using an UTF8-enabled PCRE + case 132: /* this version of PCRE is compiled without UTF support */ + case 144: /* invalid UTF-8 string */ + case 145: /* support for \\P, \\p, and \\X has not been compiled */ + case 167: /* this version of PCRE is not compiled with Unicode property support */ + case 173: /* disallowed Unicode code point (>= 0xd800 && <= 0xdfff) */ + case 174: /* invalid UTF-16 string */ + /* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE * and we do not check if strings are valid */ + case 164: /* ] is an invalid data character in JavaScript compatibility mode */ + /* This should not happen as we don't use PCRE_JAVASCRIPT_COMPAT */ g_warning ("%s", *errmsg); *errcode = G_REGEX_ERROR_COMPILE; break; + case 170: /* internal error: unknown opcode in find_fixedlength() */ + *errcode = G_REGEX_ERROR_INTERNAL; + break; + default: *errcode = G_REGEX_ERROR_COMPILE; } diff --git a/glib/gregex.h b/glib/gregex.h index ae6a5a138..4964c662e 100644 --- a/glib/gregex.h +++ b/glib/gregex.h @@ -107,9 +107,28 @@ G_BEGIN_DECLS * This error is never raised. Since: 2.16 Deprecated: 2.34 * @G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: Inconsistent newline options. * Since 2.16 - * @G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced - * name or an optionally braced non-zero number. Since 2.16 - * + * @G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced, + * angle-bracketed, or quoted name or number, or by a plain number. Since: 2.16 + * @G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: relative reference must not be zero. Since: 2.34 + * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: the backtracing + * control verb used does not allow an argument. Since: 2.34 + * @G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: unknown backtracing + * control verb. Since: 2.34 + * @G_REGEX_ERROR_NUMBER_TOO_BIG: number is too big in escape sequence. Since: 2.34 + * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: Missing subpattern name. Since: 2.34 + * @G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: different names for subpatterns of the + * same number are not allowed. Since: 2.34 + * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: the backtracing control + * verb requires an argument. Since: 2.34 + * @G_REGEX_ERROR_INVALID_CONTROL_CHAR: "\\c" must be followed by an ASCII + * character. Since: 2.34 + * @G_REGEX_ERROR_MISSING_NAME: "\\k" is not followed by a braced, angle-bracketed, or + * quoted name. Since: 2.34 + * @G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: "\\N" is not supported in a class. Since: 2.34 + * @G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: too many forward references. Since: 2.34 + * @G_REGEX_ERROR_NAME_TOO_LONG: the name is too long in "(*MARK)", "(*PRUNE)", + * "(*SKIP)", or "(*THEN)". Since: 2.34 + * * Error codes returned by regular expressions functions. * * Since: 2.14 @@ -159,7 +178,20 @@ typedef enum G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE = 154, G_REGEX_ERROR_DEFINE_REPETION = 155, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS = 156, - G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157 + G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157, + G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE = 158, + G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN = 159, + G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB = 160, + G_REGEX_ERROR_NUMBER_TOO_BIG = 161, + G_REGEX_ERROR_MISSING_SUBPATTERN_NAME = 162, + G_REGEX_ERROR_MISSING_DIGIT = 163, + G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME = 165, + G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166, + G_REGEX_ERROR_INVALID_CONTROL_CHAR = 168, + G_REGEX_ERROR_MISSING_NAME = 169, + G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171, + G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172, + G_REGEX_ERROR_NAME_TOO_LONG = 175 } GRegexError; /** diff --git a/glib/tests/regex.c b/glib/tests/regex.c index af34fc45d..6deb50f40 100644 --- a/glib/tests/regex.c +++ b/glib/tests/regex.c @@ -2072,9 +2072,7 @@ main (int argc, char *argv[]) TEST_NEW_FAIL ("a{4,2}", 0, G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER); TEST_NEW_FAIL ("a{999999,}", 0, G_REGEX_ERROR_QUANTIFIER_TOO_BIG); TEST_NEW_FAIL ("[a-z", 0, G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS); -#if 0 - TEST_NEW_FAIL ("[\\b]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS); -#endif + TEST_NEW_FAIL ("(?X)[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS); TEST_NEW_FAIL ("[z-a]", 0, G_REGEX_ERROR_RANGE_OUT_OF_ORDER); TEST_NEW_FAIL ("{2,4}", 0, G_REGEX_ERROR_NOTHING_TO_REPEAT); TEST_NEW_FAIL ("a(?u)", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER); @@ -2084,8 +2082,55 @@ main (int argc, char *argv[]) TEST_NEW_FAIL ("a)b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); TEST_NEW_FAIL ("a(?R", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); + TEST_NEW_FAIL ("(ab\\2)", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE); TEST_NEW_FAIL ("a(?#abc", 0, G_REGEX_ERROR_UNTERMINATED_COMMENT); + TEST_NEW_FAIL ("(?<=a+)b", 0, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND); + TEST_NEW_FAIL ("(?(1?)a|b)", 0, G_REGEX_ERROR_MALFORMED_CONDITION); + TEST_NEW_FAIL ("(a)(?(1)a|b|c)", 0, G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES); + TEST_NEW_FAIL ("(?(?i))", 0, G_REGEX_ERROR_ASSERTION_EXPECTED); TEST_NEW_FAIL ("a[[:fubar:]]b", 0, G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME); + TEST_NEW_FAIL ("[[.ch.]]", 0, G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED); + TEST_NEW_FAIL ("\\x{110000}", 0, G_REGEX_ERROR_HEX_CODE_TOO_LARGE); + TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INVALID_CONDITION); + TEST_NEW_FAIL ("(?<=\\C)X", 0, G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND); + TEST_NEW_FAIL ("(?!\\w)(?R)", 0, G_REGEX_ERROR_INFINITE_LOOP); + TEST_NEW_FAIL ("(?(?eks)(?Peccs)", 0, G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME); +#if 0 + TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_MALFORMED_PROPERTY); + TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_UNKNOWN_PROPERTY); +#endif + TEST_NEW_FAIL ("\\666", G_REGEX_RAW, G_REGEX_ERROR_INVALID_OCTAL_VALUE); + TEST_NEW_FAIL ("^(?(DEFINE) abc | xyz ) ", 0, G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE); +#if 0 + TEST_NEW_FAIL ("a", G_REGEX_NEWLINE_CRLF | G_REGEX_NEWLINE_ANYCRLF, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS); +#endif + TEST_NEW_FAIL ("^(a)\\g{3", 0, G_REGEX_ERROR_MISSING_BACK_REFERENCE); + TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE); + TEST_NEW_FAIL ("abc(*FAIL:123)xyz", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN); + TEST_NEW_FAIL ("a(*FOOBAR)b", 0, G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB); + TEST_NEW_FAIL ("(?i:A{1,}\\6666666666)", 0, G_REGEX_ERROR_NUMBER_TOO_BIG); + TEST_NEW_FAIL ("(?)(?&)", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME); + TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_MISSING_DIGIT); + TEST_NEW_FAIL ("(?|(?A)|(?B))", 0, G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME); + TEST_NEW_FAIL ("a(*MARK)b", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED); + TEST_NEW_FAIL ("^\\c€", 0, G_REGEX_ERROR_INVALID_CONTROL_CHAR); + TEST_NEW_FAIL ("\\k", 0, G_REGEX_ERROR_MISSING_NAME); + TEST_NEW_FAIL ("a[\\NB]c", 0, G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS); +#if 0 + /* This one should fail but doesn't. Why? */ + TEST_NEW_FAIL ("(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEFG)XX", 0, G_REGEX_ERROR_NAME_TOO_LONG); +#endif + /* These errors can't really be tested sanely: + * G_REGEX_ERROR_EXPRESSION_TOO_LARGE + * G_REGEX_ERROR_MEMORY_ERROR + * G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG + * G_REGEX_ERROR_TOO_MANY_SUBPATTERNS + * G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES + * + * These errors are obsolete and never raised by PCRE: + * G_REGEX_ERROR_DEFINE_REPETION + */ /* TEST_MATCH_SIMPLE(pattern, string, compile_opts, match_opts, expected) */ TEST_MATCH_SIMPLE("a", "", 0, 0, FALSE);