mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-02-22 18:22:11 +01:00
regex: Add NO_START_OPTIMIZE compile and match flags
PCRE_NO_START_OPTIMIZE exists since PCRE 7.9, but was not usefully before since it only affects callout (which GRegex doesn't support) and backtracking control verbs which the last commit makes use of.
This commit is contained in:
parent
d18c301015
commit
a6e3eb6ece
@ -128,7 +128,8 @@
|
||||
G_REGEX_NEWLINE_CRLF | \
|
||||
G_REGEX_NEWLINE_ANYCRLF | \
|
||||
G_REGEX_BSR_ANYCRLF | \
|
||||
G_REGEX_JAVASCRIPT_COMPAT)
|
||||
G_REGEX_JAVASCRIPT_COMPAT | \
|
||||
G_REGEX_NO_START_OPTIMIZE)
|
||||
|
||||
/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
|
||||
#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
|
||||
@ -136,20 +137,21 @@
|
||||
G_REGEX_OPTIMIZE)
|
||||
|
||||
/* Mask of all the possible values for GRegexMatchFlags. */
|
||||
#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \
|
||||
G_REGEX_MATCH_NOTBOL | \
|
||||
G_REGEX_MATCH_NOTEOL | \
|
||||
G_REGEX_MATCH_NOTEMPTY | \
|
||||
G_REGEX_MATCH_PARTIAL | \
|
||||
G_REGEX_MATCH_NEWLINE_CR | \
|
||||
G_REGEX_MATCH_NEWLINE_LF | \
|
||||
G_REGEX_MATCH_NEWLINE_CRLF | \
|
||||
G_REGEX_MATCH_NEWLINE_ANY | \
|
||||
G_REGEX_MATCH_NEWLINE_ANYCRLF | \
|
||||
G_REGEX_MATCH_BSR_ANYCRLF | \
|
||||
G_REGEX_MATCH_BSR_ANY | \
|
||||
G_REGEX_MATCH_PARTIAL_SOFT | \
|
||||
G_REGEX_MATCH_PARTIAL_HARD | \
|
||||
#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \
|
||||
G_REGEX_MATCH_NOTBOL | \
|
||||
G_REGEX_MATCH_NOTEOL | \
|
||||
G_REGEX_MATCH_NOTEMPTY | \
|
||||
G_REGEX_MATCH_PARTIAL | \
|
||||
G_REGEX_MATCH_NEWLINE_CR | \
|
||||
G_REGEX_MATCH_NEWLINE_LF | \
|
||||
G_REGEX_MATCH_NEWLINE_CRLF | \
|
||||
G_REGEX_MATCH_NEWLINE_ANY | \
|
||||
G_REGEX_MATCH_NEWLINE_ANYCRLF | \
|
||||
G_REGEX_MATCH_BSR_ANYCRLF | \
|
||||
G_REGEX_MATCH_BSR_ANY | \
|
||||
G_REGEX_MATCH_NO_START_OPTIMIZE | \
|
||||
G_REGEX_MATCH_PARTIAL_SOFT | \
|
||||
G_REGEX_MATCH_PARTIAL_HARD | \
|
||||
G_REGEX_MATCH_NOTEMPTY_ATSTART)
|
||||
|
||||
/* we rely on these flags having the same values */
|
||||
@ -169,22 +171,24 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
|
||||
G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
||||
G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
||||
G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
|
||||
G_STATIC_ASSERT (G_REGEX_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
|
||||
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
|
||||
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
|
||||
|
||||
/* These PCRE flags are unused or not exposed publically in GRegexFlags, so
|
||||
* it should be ok to reuse them for different things.
|
||||
@ -659,6 +663,9 @@ g_match_info_get_string (const GMatchInfo *match_info)
|
||||
* the argument of the last verb encountered in the whole matching
|
||||
* process. Otherwise, $NULL is returned.
|
||||
*
|
||||
* See <ulink>man:pcrepattern<ulink> for more information on
|
||||
* backtracking control verbs.
|
||||
*
|
||||
* Returns: (transfer none): the mark, or %NULL
|
||||
*
|
||||
* Since: 2.34
|
||||
|
@ -279,7 +279,8 @@ GQuark g_regex_error_quark (void);
|
||||
* G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence
|
||||
* is recognised. If this option is set, then "\R" only recognizes the newline
|
||||
* characters '\r', '\n' and '\r\n'. Since: 2.34
|
||||
*
|
||||
* @G_REGEX_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
|
||||
* results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
|
||||
*
|
||||
* Flags specifying compile-time options.
|
||||
*
|
||||
@ -306,7 +307,8 @@ typedef enum
|
||||
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
|
||||
G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
|
||||
G_REGEX_BSR_ANYCRLF = 1 << 23,
|
||||
G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
|
||||
G_REGEX_JAVASCRIPT_COMPAT = 1 << 25,
|
||||
G_REGEX_NO_START_OPTIMIZE = 1 << 26
|
||||
} GRegexCompileFlags;
|
||||
|
||||
/**
|
||||
@ -372,6 +374,8 @@ typedef enum
|
||||
* @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
|
||||
* the start of the matched string. For anchored
|
||||
* patterns this can only happen for pattern containing "\K". Since: 2.34
|
||||
* @G_REGEX_MATCH_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
|
||||
* results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
|
||||
*
|
||||
* Flags specifying match-time options.
|
||||
*
|
||||
@ -381,21 +385,22 @@ typedef enum
|
||||
* adding a new flag. */
|
||||
typedef enum
|
||||
{
|
||||
G_REGEX_MATCH_ANCHORED = 1 << 4,
|
||||
G_REGEX_MATCH_NOTBOL = 1 << 7,
|
||||
G_REGEX_MATCH_NOTEOL = 1 << 8,
|
||||
G_REGEX_MATCH_NOTEMPTY = 1 << 10,
|
||||
G_REGEX_MATCH_PARTIAL = 1 << 15,
|
||||
G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
|
||||
G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
|
||||
G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
|
||||
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
|
||||
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
|
||||
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
|
||||
G_REGEX_MATCH_BSR_ANY = 1 << 24,
|
||||
G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
|
||||
G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
|
||||
G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
|
||||
G_REGEX_MATCH_ANCHORED = 1 << 4,
|
||||
G_REGEX_MATCH_NOTBOL = 1 << 7,
|
||||
G_REGEX_MATCH_NOTEOL = 1 << 8,
|
||||
G_REGEX_MATCH_NOTEMPTY = 1 << 10,
|
||||
G_REGEX_MATCH_PARTIAL = 1 << 15,
|
||||
G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
|
||||
G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
|
||||
G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
|
||||
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
|
||||
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
|
||||
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
|
||||
G_REGEX_MATCH_BSR_ANY = 1 << 24,
|
||||
G_REGEX_MATCH_NO_START_OPTIMIZE = 1 << 26,
|
||||
G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
|
||||
G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
|
||||
G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
|
||||
} GRegexMatchFlags;
|
||||
|
||||
/**
|
||||
|
@ -2146,7 +2146,7 @@ main (int argc, char *argv[])
|
||||
TEST_NEW_CHECK_FLAGS ("(*ANYCRLF)a", 0, 0, G_REGEX_NEWLINE_ANYCRLF, 0);
|
||||
TEST_NEW_CHECK_FLAGS ("(*BSR_ANYCRLF)a", 0, 0, G_REGEX_BSR_ANYCRLF, 0);
|
||||
TEST_NEW_CHECK_FLAGS ("(*BSR_UNICODE)a", 0, 0, 0 /* this is the default in GRegex */, 0);
|
||||
TEST_NEW_CHECK_FLAGS ("(*NO_START_OPT)a", 0, 0, 0 /* not exposed in GRegex */, 0);
|
||||
TEST_NEW_CHECK_FLAGS ("(*NO_START_OPT)", 0, 0, G_REGEX_NO_START_OPTIMIZE, 0);
|
||||
|
||||
/* TEST_NEW_FAIL(pattern, compile_opts, expected_error) */
|
||||
TEST_NEW_FAIL("(", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
|
||||
|
Loading…
x
Reference in New Issue
Block a user