regex: Add NO_START_OPTIMIZE compile and match flags

PCRE_NO_START_OPTIMIZE exists since PCRE 7.9, but was not usefully before
since it only affects callout (which GRegex doesn't support) and backtracking
control verbs which the last commit makes use of.
This commit is contained in:
Christian Persch 2012-06-08 00:56:44 +02:00
parent d18c301015
commit a6e3eb6ece
3 changed files with 60 additions and 48 deletions

View File

@ -128,7 +128,8 @@
G_REGEX_NEWLINE_CRLF | \
G_REGEX_NEWLINE_ANYCRLF | \
G_REGEX_BSR_ANYCRLF | \
G_REGEX_JAVASCRIPT_COMPAT)
G_REGEX_JAVASCRIPT_COMPAT | \
G_REGEX_NO_START_OPTIMIZE)
/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
@ -136,20 +137,21 @@
G_REGEX_OPTIMIZE)
/* Mask of all the possible values for GRegexMatchFlags. */
#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \
G_REGEX_MATCH_NOTBOL | \
G_REGEX_MATCH_NOTEOL | \
G_REGEX_MATCH_NOTEMPTY | \
G_REGEX_MATCH_PARTIAL | \
G_REGEX_MATCH_NEWLINE_CR | \
G_REGEX_MATCH_NEWLINE_LF | \
G_REGEX_MATCH_NEWLINE_CRLF | \
G_REGEX_MATCH_NEWLINE_ANY | \
G_REGEX_MATCH_NEWLINE_ANYCRLF | \
G_REGEX_MATCH_BSR_ANYCRLF | \
G_REGEX_MATCH_BSR_ANY | \
G_REGEX_MATCH_PARTIAL_SOFT | \
G_REGEX_MATCH_PARTIAL_HARD | \
#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \
G_REGEX_MATCH_NOTBOL | \
G_REGEX_MATCH_NOTEOL | \
G_REGEX_MATCH_NOTEMPTY | \
G_REGEX_MATCH_PARTIAL | \
G_REGEX_MATCH_NEWLINE_CR | \
G_REGEX_MATCH_NEWLINE_LF | \
G_REGEX_MATCH_NEWLINE_CRLF | \
G_REGEX_MATCH_NEWLINE_ANY | \
G_REGEX_MATCH_NEWLINE_ANYCRLF | \
G_REGEX_MATCH_BSR_ANYCRLF | \
G_REGEX_MATCH_BSR_ANY | \
G_REGEX_MATCH_NO_START_OPTIMIZE | \
G_REGEX_MATCH_PARTIAL_SOFT | \
G_REGEX_MATCH_PARTIAL_HARD | \
G_REGEX_MATCH_NOTEMPTY_ATSTART)
/* we rely on these flags having the same values */
@ -169,22 +171,24 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
G_STATIC_ASSERT (G_REGEX_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
G_STATIC_ASSERT (G_REGEX_MATCH_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
/* These PCRE flags are unused or not exposed publically in GRegexFlags, so
* it should be ok to reuse them for different things.
@ -659,6 +663,9 @@ g_match_info_get_string (const GMatchInfo *match_info)
* the argument of the last verb encountered in the whole matching
* process. Otherwise, $NULL is returned.
*
* See <ulink>man:pcrepattern<ulink> for more information on
* backtracking control verbs.
*
* Returns: (transfer none): the mark, or %NULL
*
* Since: 2.34

View File

@ -279,7 +279,8 @@ GQuark g_regex_error_quark (void);
* G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence
* is recognised. If this option is set, then "\R" only recognizes the newline
* characters '\r', '\n' and '\r\n'. Since: 2.34
*
* @G_REGEX_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
* results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
*
* Flags specifying compile-time options.
*
@ -306,7 +307,8 @@ typedef enum
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
G_REGEX_BSR_ANYCRLF = 1 << 23,
G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
G_REGEX_JAVASCRIPT_COMPAT = 1 << 25,
G_REGEX_NO_START_OPTIMIZE = 1 << 26
} GRegexCompileFlags;
/**
@ -372,6 +374,8 @@ typedef enum
* @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
* the start of the matched string. For anchored
* patterns this can only happen for pattern containing "\K". Since: 2.34
* @G_REGEX_MATCH_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
* results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
*
* Flags specifying match-time options.
*
@ -381,21 +385,22 @@ typedef enum
* adding a new flag. */
typedef enum
{
G_REGEX_MATCH_ANCHORED = 1 << 4,
G_REGEX_MATCH_NOTBOL = 1 << 7,
G_REGEX_MATCH_NOTEOL = 1 << 8,
G_REGEX_MATCH_NOTEMPTY = 1 << 10,
G_REGEX_MATCH_PARTIAL = 1 << 15,
G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
G_REGEX_MATCH_BSR_ANY = 1 << 24,
G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
G_REGEX_MATCH_ANCHORED = 1 << 4,
G_REGEX_MATCH_NOTBOL = 1 << 7,
G_REGEX_MATCH_NOTEOL = 1 << 8,
G_REGEX_MATCH_NOTEMPTY = 1 << 10,
G_REGEX_MATCH_PARTIAL = 1 << 15,
G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
G_REGEX_MATCH_BSR_ANY = 1 << 24,
G_REGEX_MATCH_NO_START_OPTIMIZE = 1 << 26,
G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
} GRegexMatchFlags;
/**

View File

@ -2146,7 +2146,7 @@ main (int argc, char *argv[])
TEST_NEW_CHECK_FLAGS ("(*ANYCRLF)a", 0, 0, G_REGEX_NEWLINE_ANYCRLF, 0);
TEST_NEW_CHECK_FLAGS ("(*BSR_ANYCRLF)a", 0, 0, G_REGEX_BSR_ANYCRLF, 0);
TEST_NEW_CHECK_FLAGS ("(*BSR_UNICODE)a", 0, 0, 0 /* this is the default in GRegex */, 0);
TEST_NEW_CHECK_FLAGS ("(*NO_START_OPT)a", 0, 0, 0 /* not exposed in GRegex */, 0);
TEST_NEW_CHECK_FLAGS ("(*NO_START_OPT)", 0, 0, G_REGEX_NO_START_OPTIMIZE, 0);
/* TEST_NEW_FAIL(pattern, compile_opts, expected_error) */
TEST_NEW_FAIL("(", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);