regex: Add NOTEMPTY_ATSTART match option

Since PCRE 8.00 it supports a variant of PCRE_NOTEMPTY that works
similarly except that it only applies to the start of the matched string
but permits empty matches further in.
This commit is contained in:
Christian Persch 2012-06-07 23:24:07 +02:00
parent 1171215014
commit e99e34f65f
3 changed files with 48 additions and 31 deletions

View File

@ -148,7 +148,8 @@
G_REGEX_MATCH_BSR_ANYCRLF | \
G_REGEX_MATCH_BSR_ANY | \
G_REGEX_MATCH_PARTIAL_SOFT | \
G_REGEX_MATCH_PARTIAL_HARD)
G_REGEX_MATCH_PARTIAL_HARD | \
G_REGEX_MATCH_NOTEMPTY_ATSTART)
/* we rely on these flags having the same values */
G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
@ -167,20 +168,21 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
/* These PCRE flags are unused or not exposed publically in GRegexFlags, so
* it should be ok to reuse them for different things.

View File

@ -366,6 +366,9 @@ typedef enum
* to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
* is found, without continuing to search for a possible complete match. See
* see g_match_info_is_partial_match() for more information. Since: 2.34
* @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
* the start of the matched string. For anchored
* patterns this can only happen for pattern containing "\K". Since: 2.34
*
* Flags specifying match-time options.
*
@ -375,20 +378,21 @@ typedef enum
* adding a new flag. */
typedef enum
{
G_REGEX_MATCH_ANCHORED = 1 << 4,
G_REGEX_MATCH_NOTBOL = 1 << 7,
G_REGEX_MATCH_NOTEOL = 1 << 8,
G_REGEX_MATCH_NOTEMPTY = 1 << 10,
G_REGEX_MATCH_PARTIAL = 1 << 15,
G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
G_REGEX_MATCH_BSR_ANY = 1 << 24,
G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
G_REGEX_MATCH_PARTIAL_HARD = 1 << 27
G_REGEX_MATCH_ANCHORED = 1 << 4,
G_REGEX_MATCH_NOTBOL = 1 << 7,
G_REGEX_MATCH_NOTEOL = 1 << 8,
G_REGEX_MATCH_NOTEMPTY = 1 << 10,
G_REGEX_MATCH_PARTIAL = 1 << 15,
G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
G_REGEX_MATCH_BSR_ANY = 1 << 24,
G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
} GRegexMatchFlags;
/**

View File

@ -160,7 +160,7 @@ test_match_simple (gconstpointer d)
g_assert_cmpint (match, ==, data->expected);
}
#define TEST_MATCH_SIMPLE(_pattern, _string, _compile_opts, _match_opts, _expected) { \
#define TEST_MATCH_SIMPLE_NAMED(_name, _pattern, _string, _compile_opts, _match_opts, _expected) { \
TestMatchData *data; \
gchar *path; \
data = g_new0 (TestMatchData, 1); \
@ -169,11 +169,18 @@ test_match_simple (gconstpointer d)
data->compile_opts = _compile_opts; \
data->match_opts = _match_opts; \
data->expected = _expected; \
path = g_strdup_printf ("/regex/match-simple/%d", ++total); \
path = g_strdup_printf ("/regex/match-%s/%d", _name, ++total); \
g_test_add_data_func (path, data, test_match_simple); \
g_free (path); \
}
#define TEST_MATCH_SIMPLE(_pattern, _string, _compile_opts, _match_opts, _expected) \
TEST_MATCH_SIMPLE_NAMED("simple", _pattern, _string, _compile_opts, _match_opts, _expected)
#define TEST_MATCH_NOTEMPTY(_pattern, _string, _expected) \
TEST_MATCH_SIMPLE_NAMED("notempty", _pattern, _string, 0, G_REGEX_MATCH_NOTEMPTY, _expected)
#define TEST_MATCH_NOTEMPTY_ATSTART(_pattern, _string, _expected) \
TEST_MATCH_SIMPLE_NAMED("notempty-atstart", _pattern, _string, 0, G_REGEX_MATCH_NOTEMPTY_ATSTART, _expected)
static void
test_match (gconstpointer d)
{
@ -2683,5 +2690,9 @@ main (int argc, char *argv[])
"<a><b>", 0, 6, "<a>", 0, 3);
TEST_MATCH_ALL3("a+", "aaa", -1, 0, "aaa", 0, 3, "aa", 0, 2, "a", 0, 1);
/* NOTEMPTY matching */
TEST_MATCH_NOTEMPTY("a?b?", "xyz", FALSE);
TEST_MATCH_NOTEMPTY_ATSTART("a?b?", "xyz", TRUE);
return g_test_run ();
}