Add new error codes for when compilation fails and make compilation error

2007-12-03  Marco Barisione <marco@barisione.org>

	* glib/gregex.c:
	* glib/gregex.h: Add new error codes for when compilation fails and
	make compilation error translatable. (#482313, Morten Welinder)

svn path=/trunk/; revision=6021
This commit is contained in:
Marco Barisione 2007-12-03 18:36:26 +00:00 committed by Marco Barisione
parent e6588b7a23
commit 458f3eca76
5 changed files with 306 additions and 18 deletions

View File

@ -1,3 +1,9 @@
2007-12-03 Marco Barisione <marco@barisione.org>
* glib/gregex.c:
* glib/gregex.h: Add new error codes for when compilation fails and
make compilation error translatable. (#482313, Morten Welinder)
2007-12-03 Matthias Clasen <mclasen@redhat.com>
* glib/gkeyfile.c: Add a hash table to speed up group lookups,

View File

@ -80,10 +80,50 @@ written by Philip Hazel.
Error codes returned by regular expressions functions.
</para>
@G_REGEX_ERROR_COMPILE: Compilation of the regular expression in g_regex_new() failed.
@G_REGEX_ERROR_COMPILE: Compilation of the regular expression failed.
@G_REGEX_ERROR_OPTIMIZE: Optimization of the regular expression failed.
@G_REGEX_ERROR_REPLACE: Replacement failed due to an ill-formed replacement string.
@G_REGEX_ERROR_MATCH: The match process failed.
@G_REGEX_ERROR_INTERNAL: Internal error of the regular expression engine. Since 2.16
@G_REGEX_ERROR_STRAY_BACKSLASH: "\\" at end of pattern. Since 2.16
@G_REGEX_ERROR_MISSING_CONTROL_CHAR: "\\c" at end of pattern. Since 2.16
@G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: Unrecognized character follows "\\". Since 2.16
@G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: Numbers out of order in "{}" quantifier. Since 2.16
@G_REGEX_ERROR_QUANTIFIER_TOO_BIG: Number too big in "{}" quantifier. Since 2.16
@G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: Missing terminating "]" for character class. Since 2.16
@G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: Invalid escape sequence in character class. Since 2.16
@G_REGEX_ERROR_RANGE_OUT_OF_ORDER: Range out of order in character class. Since 2.16
@G_REGEX_ERROR_NOTHING_TO_REPEAT: Nothing to repeat. Since 2.16
@G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: Unrecognized character after "(?", "(?&lt;" or "(?P". Since 2.16
@G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: POSIX named classes are supported only within a class. Since 2.16
@G_REGEX_ERROR_UNMATCHED_PARENTHESIS: Missing terminating ")" or ")" without opening "(". Since 2.16
@G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: Reference to non-existent subpattern. Since 2.16
@G_REGEX_ERROR_UNTERMINATED_COMMENT: Missing terminating ")" after comment. Since 2.16
@G_REGEX_ERROR_EXPRESSION_TOO_LARGE: Regular expression too large. Since 2.16
@G_REGEX_ERROR_MEMORY_ERROR: Failed to get memory. Since 2.16
@G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: Lookbehind assertion is not fixed length. Since 2.16
@G_REGEX_ERROR_MALFORMED_CONDITION: Malformed number or name after "(?(". Since 2.16
@G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: Conditional group contains more than two branches. Since 2.16
@G_REGEX_ERROR_ASSERTION_EXPECTED: Assertion expected after "(?(". Since 2.16
@G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: Unknown POSIX class name. Since 2.16
@G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: POSIX collating elements are not supported. Since 2.16
@G_REGEX_ERROR_HEX_CODE_TOO_LARGE: Character value in "\\x{...}" sequence is too large. Since 2.16
@G_REGEX_ERROR_INVALID_CONDITION: Invalid condition "(?(0)". Since 2.16
@G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: \\C not allowed in lookbehind assertion. Since 2.16
@G_REGEX_ERROR_INFINITE_LOOP: Recursive call could loop indefinitely. Since 2.16
@G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: Missing terminator in subpattern name. Since 2.16
@G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: Two named subpatterns have the same name. Since 2.16
@G_REGEX_ERROR_MALFORMED_PROPERTY: Malformed "\\P" or "\\p" sequence. Since 2.16
@G_REGEX_ERROR_UNKNOWN_PROPERTY: Unknown property name after "\\P" or "\\p". Since 2.16
@G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: Subpattern name is too long (maximum 32 characters). Since 2.16
@G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: Too many named subpatterns (maximum 10,000). Since 2.16
@G_REGEX_ERROR_INVALID_OCTAL_VALUE: Octal value is greater than "\\377". Since 2.16
@G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: "DEFINE" group contains more than one branch. Since 2.16
@G_REGEX_ERROR_DEFINE_REPETION: Repeating a "DEFINE" group is not allowed. Since 2.16
@G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: Inconsistent newline options. Since 2.16
@G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced name or an
optionally braced non-zero number. Since 2.16
@Since: 2.14
<!-- ##### MACRO G_REGEX_ERROR ##### -->

View File

@ -179,6 +179,192 @@ match_error (gint errcode)
return _("unknown error");
}
static void
translate_compile_error (gint *errcode, gchar **errmsg)
{
/* Compile errors are created adding 100 to the error code returned
* by PCRE.
* If errcode is known we put the translatable error message in
* erromsg. If errcode is unknown we put the generic
* G_REGEX_ERROR_COMPILE error code in errcode and keep the
* untranslated error message returned by PCRE.
* Note that there can be more PCRE errors with the same GRegexError
* and that some PCRE errors are useless for us. */
*errcode += 100;
switch (*errcode)
{
case G_REGEX_ERROR_STRAY_BACKSLASH:
*errmsg = _("\\ at end of pattern");
break;
case G_REGEX_ERROR_MISSING_CONTROL_CHAR:
*errmsg = _("\\c at end of pattern");
break;
case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE:
*errmsg = _("unrecognized character follows \\");
break;
case 137:
*errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE;
*errmsg = _("case changing escapes are not allowed here");
break;
case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER:
*errmsg = _("numbers out of order in {} quantifier");
break;
case G_REGEX_ERROR_QUANTIFIER_TOO_BIG:
*errmsg = _("number too big in {} quantifier");
break;
case G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS:
*errmsg = _("missing terminating ] for character class");
break;
case G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS:
*errmsg = _("invalid escape sequence in character class");
break;
case G_REGEX_ERROR_RANGE_OUT_OF_ORDER:
*errmsg = _("range out of order in character class");
break;
case G_REGEX_ERROR_NOTHING_TO_REPEAT:
*errmsg = _("nothing to repeat");
break;
case G_REGEX_ERROR_UNRECOGNIZED_CHARACTER:
*errmsg = _("unrecognized character after (?");
break;
case 124:
*errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
*errmsg = _("unrecognized character after (?<");
break;
case 141:
*errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
*errmsg = _("unrecognized character after (?P");
break;
case G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS:
*errmsg = _("POSIX named classes are supported only within a class");
break;
case G_REGEX_ERROR_UNMATCHED_PARENTHESIS:
*errmsg = _("missing terminating )");
break;
case 122:
*errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
*errmsg = _(") without opening (");
break;
case 129:
*errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
*errmsg = _("(?R or (?[+-]digits must be followed by )");
break;
case G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE:
*errmsg = _("reference to non-existent subpattern");
break;
case G_REGEX_ERROR_UNTERMINATED_COMMENT:
*errmsg = _("missing ) after comment");
break;
case G_REGEX_ERROR_EXPRESSION_TOO_LARGE:
*errmsg = _("regular expression too large");
break;
case G_REGEX_ERROR_MEMORY_ERROR:
*errmsg = _("failed to get memory");
break;
case G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND:
*errmsg = _("lookbehind assertion is not fixed length");
break;
case G_REGEX_ERROR_MALFORMED_CONDITION:
*errmsg = _("malformed number or name after (?(");
break;
case G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES:
*errmsg = _("conditional group contains more than two branches");
break;
case G_REGEX_ERROR_ASSERTION_EXPECTED:
*errmsg = _("assertion expected after (?(");
break;
case G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME:
*errmsg = _("unknown POSIX class name");
break;
case G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED:
*errmsg = _("POSIX collating elements are not supported");
break;
case G_REGEX_ERROR_HEX_CODE_TOO_LARGE:
*errmsg = _("character value in \\x{...} sequence is too large");
break;
case G_REGEX_ERROR_INVALID_CONDITION:
*errmsg = _("invalid condition (?(0)");
break;
case G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND:
*errmsg = _("\\C not allowed in lookbehind assertion");
break;
case G_REGEX_ERROR_INFINITE_LOOP:
*errmsg = _("recursive call could loop indefinitely");
break;
case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR:
*errmsg = _("missing terminator in subpattern name");
break;
case G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME:
*errmsg = _("two named subpatterns have the same name");
break;
case G_REGEX_ERROR_MALFORMED_PROPERTY:
*errmsg = _("malformed \\P or \\p sequence");
break;
case G_REGEX_ERROR_UNKNOWN_PROPERTY:
*errmsg = _("unknown property name after \\P or \\p");
break;
case G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG:
*errmsg = _("subpattern name is too long (maximum 32 characters)");
break;
case G_REGEX_ERROR_TOO_MANY_SUBPATTERNS:
*errmsg = _("too many named subpatterns (maximum 10,000)");
break;
case G_REGEX_ERROR_INVALID_OCTAL_VALUE:
*errmsg = _("octal value is greater than \\377");
break;
case G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE:
*errmsg = _("DEFINE group contains more than one branch");
break;
case G_REGEX_ERROR_DEFINE_REPETION:
*errmsg = _("repeating a DEFINE group is not allowed");
break;
case G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS:
*errmsg = _("inconsistent NEWLINE options");
break;
case G_REGEX_ERROR_MISSING_BACK_REFERENCE:
*errmsg = _("\\g is not followed by a braced name or an optionally "
"braced non-zero number");
break;
case 11:
*errcode = G_REGEX_ERROR_INTERNAL;
*errmsg = _("unexpected repeat");
break;
case 23:
*errcode = G_REGEX_ERROR_INTERNAL;
*errmsg = _("code overflow");
break;
case 52:
*errcode = G_REGEX_ERROR_INTERNAL;
*errmsg = _("overran compiling workspace");
break;
case 53:
*errcode = G_REGEX_ERROR_INTERNAL;
*errmsg = _("previously-checked referenced subpattern not found");
break;
case 16:
/* This should not happen as we never pass a NULL erroffset */
g_warning ("erroffset passed as NULL");
*errcode = G_REGEX_ERROR_COMPILE;
break;
case 17:
/* This should not happen as we check options before passing them
* to pcre_compile2() */
g_warning ("unknown option bit(s) set");
*errcode = G_REGEX_ERROR_COMPILE;
break;
case 32:
case 44:
case 45:
/* These errors should not happen as we are using an UTF8-enabled PCRE
* and we do not check if strings are valid */
g_warning (*errmsg);
*errcode = G_REGEX_ERROR_COMPILE;
break;
default:
*errcode = G_REGEX_ERROR_COMPILE;
}
}
/* GMatchInfo */
@ -854,8 +1040,9 @@ g_regex_new (const gchar *pattern,
{
GRegex *regex;
pcre *re;
const gchar *errmsg;
gchar *errmsg;
gint erroffset;
gint errcode;
gboolean optimize = FALSE;
static gboolean initialized = FALSE;
unsigned long int pcre_compile_options;
@ -919,7 +1106,8 @@ g_regex_new (const gchar *pattern,
}
/* compile the pattern */
re = pcre_compile (pattern, compile_options, &errmsg, &erroffset, NULL);
re = pcre_compile2 (pattern, compile_options, &errcode,
(const gchar **)&errmsg, &erroffset, NULL);
/* if the compilation failed, set the error member and return
* immediately */
@ -927,11 +1115,14 @@ g_regex_new (const gchar *pattern,
{
GError *tmp_error;
/* Translate the PCRE error code to GRegexError and use a translated
* error message if possible */
translate_compile_error (&errcode, &errmsg);
/* PCRE uses byte offsets but we want to show character offsets */
erroffset = g_utf8_pointer_to_offset (pattern, &pattern[erroffset]);
tmp_error = g_error_new (G_REGEX_ERROR,
G_REGEX_ERROR_COMPILE,
tmp_error = g_error_new (G_REGEX_ERROR, errcode,
_("Error while compiling regular "
"expression %s at char %d: %s"),
pattern, erroffset, errmsg);
@ -963,7 +1154,7 @@ g_regex_new (const gchar *pattern,
if (optimize)
{
regex->extra = pcre_study (regex->pcre_re, 0, &errmsg);
regex->extra = pcre_study (regex->pcre_re, 0, (const gchar **)&errmsg);
if (errmsg != NULL)
{
GError *tmp_error = g_error_new (G_REGEX_ERROR,

View File

@ -32,7 +32,47 @@ typedef enum
G_REGEX_ERROR_COMPILE,
G_REGEX_ERROR_OPTIMIZE,
G_REGEX_ERROR_REPLACE,
G_REGEX_ERROR_MATCH
G_REGEX_ERROR_MATCH,
G_REGEX_ERROR_INTERNAL,
/* These are the error codes from PCRE + 100 */
G_REGEX_ERROR_STRAY_BACKSLASH = 101,
G_REGEX_ERROR_MISSING_CONTROL_CHAR = 102,
G_REGEX_ERROR_UNRECOGNIZED_ESCAPE = 103,
G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER = 104,
G_REGEX_ERROR_QUANTIFIER_TOO_BIG = 105,
G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS = 106,
G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS = 107,
G_REGEX_ERROR_RANGE_OUT_OF_ORDER = 108,
G_REGEX_ERROR_NOTHING_TO_REPEAT = 109,
G_REGEX_ERROR_UNRECOGNIZED_CHARACTER = 112,
G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS = 113,
G_REGEX_ERROR_UNMATCHED_PARENTHESIS = 114,
G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE = 115,
G_REGEX_ERROR_UNTERMINATED_COMMENT = 118,
G_REGEX_ERROR_EXPRESSION_TOO_LARGE = 120,
G_REGEX_ERROR_MEMORY_ERROR = 121,
G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND = 125,
G_REGEX_ERROR_MALFORMED_CONDITION = 126,
G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES = 127,
G_REGEX_ERROR_ASSERTION_EXPECTED = 128,
G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME = 130,
G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED = 131,
G_REGEX_ERROR_HEX_CODE_TOO_LARGE = 134,
G_REGEX_ERROR_INVALID_CONDITION = 135,
G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND = 136,
G_REGEX_ERROR_INFINITE_LOOP = 140,
G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR = 142,
G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME = 143,
G_REGEX_ERROR_MALFORMED_PROPERTY = 146,
G_REGEX_ERROR_UNKNOWN_PROPERTY = 147,
G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG = 148,
G_REGEX_ERROR_TOO_MANY_SUBPATTERNS = 149,
G_REGEX_ERROR_INVALID_OCTAL_VALUE = 151,
G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE = 154,
G_REGEX_ERROR_DEFINE_REPETION = 155,
G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS = 156,
G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157
} GRegexError;
#define G_REGEX_ERROR g_regex_error_quark ()

View File

@ -132,13 +132,15 @@ test_new (const gchar *pattern,
static gboolean
test_new_fail (const gchar *pattern,
GRegexCompileFlags compile_opts)
GRegexCompileFlags compile_opts,
GRegexError expected_error)
{
GRegex *regex;
GError *error = NULL;
verbose ("compiling \"%s\" (expected a failure) \t", pattern);
regex = g_regex_new (pattern, compile_opts, 0, NULL);
regex = g_regex_new (pattern, compile_opts, 0, &error);
if (regex != NULL)
{
@ -148,13 +150,22 @@ test_new_fail (const gchar *pattern,
return FALSE;
}
if (error->code != expected_error)
{
g_print ("failed \t(pattern: \"%s\", compile: %d, got error: %d, "
"expected error: %d)\n",
pattern, compile_opts, error->code, expected_error);
g_error_free (error);
return FALSE;
}
verbose ("passed\n");
return TRUE;
}
#define TEST_NEW_FAIL(pattern, compile_opts) { \
#define TEST_NEW_FAIL(pattern, compile_opts, expected_error) { \
total++; \
if (test_new_fail (pattern, compile_opts)) \
if (test_new_fail (pattern, compile_opts, expected_error)) \
PASS; \
else \
FAIL; \
@ -1600,13 +1611,13 @@ main (int argc, char *argv[])
/* This gives "internal error: code overflow" with pcre 6.0 */
TEST_NEW("(?i)(?-i)", 0, 0);
/* TEST_NEW_FAIL(pattern, compile_opts) */
TEST_NEW_FAIL("(", 0);
TEST_NEW_FAIL(")", 0);
TEST_NEW_FAIL("[", 0);
TEST_NEW_FAIL("*", 0);
TEST_NEW_FAIL("?", 0);
TEST_NEW_FAIL("(?P<A>x)|(?P<A>y)", 0);
/* TEST_NEW_FAIL(pattern, compile_opts, expected_error) */
TEST_NEW_FAIL("(", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
TEST_NEW_FAIL(")", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
TEST_NEW_FAIL("[", 0, G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS);
TEST_NEW_FAIL("*", 0, G_REGEX_ERROR_NOTHING_TO_REPEAT);
TEST_NEW_FAIL("?", 0, G_REGEX_ERROR_NOTHING_TO_REPEAT);
TEST_NEW_FAIL("(?P<A>x)|(?P<A>y)", 0, G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME);
/* TEST_MATCH_SIMPLE(pattern, string, compile_opts, match_opts, expected) */
TEST_MATCH_SIMPLE("a", "", 0, 0, FALSE);