mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-27 14:36:16 +01:00
Merge branch 'rybalkin-gregex-jit-compilation' into 'main'
gregex: use G_REGEX_OPTIMIZE flag to enable JIT compilation Closes #566 See merge request GNOME/glib!2817
This commit is contained in:
commit
b12e031f5e
104
glib/gregex.c
104
glib/gregex.c
@ -144,7 +144,6 @@
|
||||
PCRE2_NOTBOL | \
|
||||
PCRE2_NOTEOL | \
|
||||
PCRE2_NOTEMPTY | \
|
||||
PCRE2_PARTIAL_SOFT | \
|
||||
PCRE2_NEWLINE_CR | \
|
||||
PCRE2_NEWLINE_LF | \
|
||||
PCRE2_NEWLINE_CRLF | \
|
||||
@ -195,6 +194,13 @@ struct _GMatchInfo
|
||||
pcre2_match_data *match_data;
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
JIT_STATUS_DEFAULT,
|
||||
JIT_STATUS_ENABLED,
|
||||
JIT_STATUS_DISABLED
|
||||
} JITStatus;
|
||||
|
||||
struct _GRegex
|
||||
{
|
||||
gint ref_count; /* the ref count for the immutable part (atomic) */
|
||||
@ -203,6 +209,8 @@ struct _GRegex
|
||||
GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */
|
||||
GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */
|
||||
GRegexMatchFlags match_opts; /* options used at match time on the regex */
|
||||
gint jit_options; /* options which were enabled for jit compiler */
|
||||
JITStatus jit_status; /* indicates the status of jit compiler for this compiled regex */
|
||||
};
|
||||
|
||||
/* TRUE if ret is an error code, FALSE otherwise. */
|
||||
@ -262,10 +270,11 @@ map_to_pcre2_compile_flags (gint pcre1_flags)
|
||||
if (pcre1_flags & G_REGEX_BSR_ANYCRLF)
|
||||
pcre2_flags |= PCRE2_BSR_ANYCRLF;
|
||||
|
||||
/* these are not available in pcre2 */
|
||||
G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||||
/* these are not available in pcre2, but we use G_REGEX_OPTIMIZE as a special
|
||||
* case to request JIT compilation */
|
||||
if (pcre1_flags & G_REGEX_OPTIMIZE)
|
||||
pcre2_flags |= 0;
|
||||
G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||||
if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT)
|
||||
pcre2_flags |= 0;
|
||||
G_GNUC_END_IGNORE_DEPRECATIONS
|
||||
@ -291,8 +300,6 @@ map_to_pcre2_match_flags (gint pcre1_flags)
|
||||
pcre2_flags |= PCRE2_NOTEOL;
|
||||
if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY)
|
||||
pcre2_flags |= PCRE2_NOTEMPTY;
|
||||
if (pcre1_flags & G_REGEX_MATCH_PARTIAL)
|
||||
pcre2_flags |= PCRE2_PARTIAL_SOFT;
|
||||
if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR)
|
||||
pcre2_flags |= PCRE2_NEWLINE_CR;
|
||||
if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF)
|
||||
@ -385,8 +392,6 @@ map_to_pcre1_match_flags (gint pcre2_flags)
|
||||
pcre1_flags |= G_REGEX_MATCH_NOTEOL;
|
||||
if (pcre2_flags & PCRE2_NOTEMPTY)
|
||||
pcre1_flags |= G_REGEX_MATCH_NOTEMPTY;
|
||||
if (pcre2_flags & PCRE2_PARTIAL_SOFT)
|
||||
pcre1_flags |= G_REGEX_MATCH_PARTIAL;
|
||||
if (pcre2_flags & PCRE2_NEWLINE_CR)
|
||||
pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR;
|
||||
if (pcre2_flags & PCRE2_NEWLINE_LF)
|
||||
@ -461,6 +466,9 @@ match_error (gint errcode)
|
||||
return _("bad offset");
|
||||
case PCRE2_ERROR_RECURSELOOP:
|
||||
return _("recursion loop");
|
||||
case PCRE2_ERROR_JIT_BADOPTION:
|
||||
/* should not happen in GRegex since we check modes before each match */
|
||||
return _("matching mode is requested that was not compiled for JIT");
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -817,6 +825,56 @@ recalc_match_offsets (GMatchInfo *match_info,
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
enable_jit_with_match_options (GRegex *regex,
|
||||
GRegexMatchFlags match_options)
|
||||
{
|
||||
gint old_jit_options, new_jit_options, retval;
|
||||
|
||||
if (!(regex->orig_compile_opts & G_REGEX_OPTIMIZE))
|
||||
return;
|
||||
if (regex->jit_status == JIT_STATUS_DISABLED)
|
||||
return;
|
||||
|
||||
old_jit_options = regex->jit_options;
|
||||
new_jit_options = old_jit_options | PCRE2_JIT_COMPLETE;
|
||||
if (match_options & PCRE2_PARTIAL_HARD)
|
||||
new_jit_options |= PCRE2_JIT_PARTIAL_HARD;
|
||||
if (match_options & PCRE2_PARTIAL_SOFT)
|
||||
new_jit_options |= PCRE2_JIT_PARTIAL_SOFT;
|
||||
|
||||
/* no new options enabled */
|
||||
if (new_jit_options == old_jit_options)
|
||||
return;
|
||||
|
||||
retval = pcre2_jit_compile (regex->pcre_re, new_jit_options);
|
||||
switch (retval)
|
||||
{
|
||||
case 0: /* JIT enabled successfully */
|
||||
regex->jit_status = JIT_STATUS_ENABLED;
|
||||
regex->jit_options = new_jit_options;
|
||||
break;
|
||||
case PCRE2_ERROR_NOMEMORY:
|
||||
g_warning ("JIT compilation was requested with G_REGEX_OPTIMIZE, "
|
||||
"but JIT was unable to allocate executable memory for the "
|
||||
"compiler. Falling back to interpretive code.");
|
||||
regex->jit_status = JIT_STATUS_DISABLED;
|
||||
break;
|
||||
case PCRE2_ERROR_JIT_BADOPTION:
|
||||
g_warning ("JIT compilation was requested with G_REGEX_OPTIMIZE, "
|
||||
"but JIT support is not available. Falling back to "
|
||||
"interpretive code.");
|
||||
regex->jit_status = JIT_STATUS_DISABLED;
|
||||
break;
|
||||
default:
|
||||
g_warning ("JIT compilation was requested with G_REGEX_OPTIMIZE, "
|
||||
"but request for JIT support had unexpectedly failed. "
|
||||
"Falling back to interpretive code.");
|
||||
regex->jit_status = JIT_STATUS_DISABLED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* g_match_info_get_regex:
|
||||
* @match_info: a #GMatchInfo
|
||||
@ -956,13 +1014,28 @@ g_match_info_next (GMatchInfo *match_info,
|
||||
}
|
||||
|
||||
opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts);
|
||||
match_info->matches = pcre2_match (match_info->regex->pcre_re,
|
||||
(PCRE2_SPTR8) match_info->string,
|
||||
match_info->string_len,
|
||||
match_info->pos,
|
||||
opts & ~G_REGEX_FLAGS_CONVERTED,
|
||||
match_info->match_data,
|
||||
match_info->match_context);
|
||||
|
||||
enable_jit_with_match_options (match_info->regex, opts);
|
||||
if (match_info->regex->jit_status == JIT_STATUS_ENABLED)
|
||||
{
|
||||
match_info->matches = pcre2_jit_match (match_info->regex->pcre_re,
|
||||
(PCRE2_SPTR8) match_info->string,
|
||||
match_info->string_len,
|
||||
match_info->pos,
|
||||
opts & ~G_REGEX_FLAGS_CONVERTED,
|
||||
match_info->match_data,
|
||||
match_info->match_context);
|
||||
}
|
||||
else
|
||||
{
|
||||
match_info->matches = pcre2_match (match_info->regex->pcre_re,
|
||||
(PCRE2_SPTR8) match_info->string,
|
||||
match_info->string_len,
|
||||
match_info->pos,
|
||||
opts & ~G_REGEX_FLAGS_CONVERTED,
|
||||
match_info->match_data,
|
||||
match_info->match_context);
|
||||
}
|
||||
|
||||
if (IS_PCRE2_ERROR (match_info->matches))
|
||||
{
|
||||
@ -1582,6 +1655,7 @@ g_regex_new (const gchar *pattern,
|
||||
regex->compile_opts = compile_options;
|
||||
regex->orig_compile_opts = orig_compile_opts;
|
||||
regex->match_opts = match_options;
|
||||
enable_jit_with_match_options (regex, regex->match_opts);
|
||||
|
||||
return regex;
|
||||
}
|
||||
@ -1836,10 +1910,8 @@ g_regex_get_compile_flags (const GRegex *regex)
|
||||
|
||||
g_return_val_if_fail (regex != NULL, 0);
|
||||
|
||||
G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||||
/* Preserve original G_REGEX_OPTIMIZE */
|
||||
extra_flags = (regex->orig_compile_opts & G_REGEX_OPTIMIZE);
|
||||
G_GNUC_END_IGNORE_DEPRECATIONS
|
||||
|
||||
/* Also include the newline options */
|
||||
pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_NEWLINE, &info_value);
|
||||
|
@ -262,11 +262,13 @@ GQuark g_regex_error_quark (void);
|
||||
* followed by "?" behaves as if it were followed by "?:" but named
|
||||
* parentheses can still be used for capturing (and they acquire numbers
|
||||
* in the usual way).
|
||||
* @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will
|
||||
* be used many times, then it may be worth the effort to optimize it
|
||||
* to improve the speed of matches. Deprecated in GLib 2.74 which now uses
|
||||
* libpcre2, which doesn’t require separate optimization of queries. This
|
||||
* option is now a no-op. Deprecated: 2.74
|
||||
* @G_REGEX_OPTIMIZE: Since 2.74 and the port to pcre2, requests JIT
|
||||
* compilation, which, if the just-in-time compiler is available, further
|
||||
* processes a compiled pattern into machine code that executes much
|
||||
* faster. However, it comes at the cost of extra processing before the
|
||||
* match is performed, so it is most beneficial to use this when the same
|
||||
* compiled pattern is used for matching many times. Before 2.74 this
|
||||
* option used the built-in non-JIT optimizations in pcre1.
|
||||
* @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the
|
||||
* first newline. Since: 2.34
|
||||
* @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not
|
||||
@ -311,7 +313,7 @@ typedef enum
|
||||
G_REGEX_UNGREEDY = 1 << 9,
|
||||
G_REGEX_RAW = 1 << 11,
|
||||
G_REGEX_NO_AUTO_CAPTURE = 1 << 12,
|
||||
G_REGEX_OPTIMIZE GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 13,
|
||||
G_REGEX_OPTIMIZE = 1 << 13,
|
||||
G_REGEX_FIRSTLINE = 1 << 18,
|
||||
G_REGEX_DUPNAMES = 1 << 19,
|
||||
G_REGEX_NEWLINE_CR = 1 << 20,
|
||||
|
@ -516,7 +516,7 @@ test_partial (gconstpointer d)
|
||||
GRegex *regex;
|
||||
GMatchInfo *match_info;
|
||||
|
||||
regex = g_regex_new (data->pattern, G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
|
||||
regex = g_regex_new (data->pattern, data->compile_opts, G_REGEX_MATCH_DEFAULT, NULL);
|
||||
|
||||
g_assert (regex != NULL);
|
||||
|
||||
@ -534,12 +534,13 @@ test_partial (gconstpointer d)
|
||||
g_regex_unref (regex);
|
||||
}
|
||||
|
||||
#define TEST_PARTIAL_FULL(_pattern, _string, _match_opts, _expected) { \
|
||||
#define TEST_PARTIAL_FULL(_pattern, _string, _compile_opts, _match_opts, _expected) { \
|
||||
TestMatchData *data; \
|
||||
gchar *path; \
|
||||
data = g_new0 (TestMatchData, 1); \
|
||||
data->pattern = _pattern; \
|
||||
data->string = _string; \
|
||||
data->compile_opts = _compile_opts; \
|
||||
data->match_opts = _match_opts; \
|
||||
data->expected = _expected; \
|
||||
path = g_strdup_printf ("/regex/match/partial/%d", ++total); \
|
||||
@ -547,7 +548,7 @@ test_partial (gconstpointer d)
|
||||
g_free (path); \
|
||||
}
|
||||
|
||||
#define TEST_PARTIAL(_pattern, _string, _expected) TEST_PARTIAL_FULL(_pattern, _string, G_REGEX_MATCH_PARTIAL, _expected)
|
||||
#define TEST_PARTIAL(_pattern, _string, _compile_opts, _expected) TEST_PARTIAL_FULL(_pattern, _string, _compile_opts, G_REGEX_MATCH_PARTIAL, _expected)
|
||||
|
||||
typedef struct {
|
||||
const gchar *pattern;
|
||||
@ -1504,7 +1505,7 @@ test_properties (void)
|
||||
gchar *str;
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
res = g_regex_match (regex, "ppPP01", 0, &match);
|
||||
g_assert (res);
|
||||
str = g_match_info_fetch (match, 0);
|
||||
@ -1525,7 +1526,7 @@ test_class (void)
|
||||
gchar *str;
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
res = g_regex_match (regex, "a:b:\340\254\236:\333\253:\316\240", 0, &match);
|
||||
g_assert (res);
|
||||
str = g_match_info_fetch (match, 0);
|
||||
@ -1571,7 +1572,7 @@ test_lookahead (void)
|
||||
gint start, end;
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("\\w+(?=;)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("\\w+(?=;)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "word1 word2: word3;", 0, &match);
|
||||
@ -1585,7 +1586,7 @@ test_lookahead (void)
|
||||
g_regex_unref (regex);
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("foo(?!bar)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("foo(?!bar)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "foobar foobaz", 0, &match);
|
||||
@ -1600,7 +1601,7 @@ test_lookahead (void)
|
||||
g_regex_unref (regex);
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("(?!bar)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?!bar)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "foobar foobaz", 0, &match);
|
||||
@ -1633,7 +1634,7 @@ test_lookbehind (void)
|
||||
gint start, end;
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("(?<!foo)bar", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?<!foo)bar", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "foobar boobar", 0, &match);
|
||||
@ -1648,7 +1649,7 @@ test_lookbehind (void)
|
||||
g_regex_unref (regex);
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "don poo, and bullock poo", 0, &match);
|
||||
@ -1661,17 +1662,17 @@ test_lookbehind (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex == NULL);
|
||||
g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
|
||||
g_clear_error (&error);
|
||||
|
||||
regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex == NULL);
|
||||
g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
|
||||
g_clear_error (&error);
|
||||
|
||||
regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "abfoo, abdfoo, abcfoo", 0, &match);
|
||||
@ -1683,7 +1684,7 @@ test_lookbehind (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "abcabcabcabcabcabcabcabcabcd", 0, &match);
|
||||
@ -1692,7 +1693,7 @@ test_lookbehind (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
|
||||
@ -1704,7 +1705,7 @@ test_lookbehind (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
|
||||
@ -1716,7 +1717,7 @@ test_lookbehind (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
|
||||
@ -1728,7 +1729,7 @@ test_lookbehind (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "foobarbaz barfoobaz barbarbaz", 0, &match);
|
||||
@ -1753,7 +1754,7 @@ test_subpattern (void)
|
||||
gint start;
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 1);
|
||||
@ -1771,7 +1772,7 @@ test_subpattern (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3);
|
||||
@ -1795,7 +1796,7 @@ test_subpattern (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "the white queen", 0, &match);
|
||||
@ -1815,7 +1816,7 @@ test_subpattern (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3);
|
||||
@ -1835,7 +1836,7 @@ test_subpattern (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
g_assert_cmpint (g_regex_get_max_backref (regex), ==, 1);
|
||||
@ -1853,7 +1854,7 @@ test_subpattern (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "abcabc abcdef defabc defdef", 0, &match);
|
||||
@ -1870,7 +1871,7 @@ test_subpattern (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "Mon Tuesday Wed Saturday", 0, &match);
|
||||
@ -1897,7 +1898,7 @@ test_subpattern (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "aaaaaaaaaaaaaaaa", 0, &match);
|
||||
@ -1921,7 +1922,7 @@ test_condition (void)
|
||||
gboolean res;
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "a(zzzzzz)b", 0, &match);
|
||||
@ -1935,7 +1936,7 @@ test_condition (void)
|
||||
g_regex_unref (regex);
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "a(zzzzzz)b", 0, &match);
|
||||
@ -1948,7 +1949,7 @@ test_condition (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "a[zzzzzz]b", 0, &match);
|
||||
@ -1963,7 +1964,7 @@ test_condition (void)
|
||||
|
||||
regex = g_regex_new ("(?(DEFINE) (?<byte> 2[0-4]\\d | 25[0-5] | 1\\d\\d | [1-9]?\\d) )"
|
||||
"\\b (?&byte) (\\.(?&byte)){3} \\b",
|
||||
G_REGEX_EXTENDED, 0, &error);
|
||||
G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "128.0.0.1", 0, &match);
|
||||
@ -1982,7 +1983,7 @@ test_condition (void)
|
||||
|
||||
regex = g_regex_new ("^(?(?=[^a-z]*[a-z])"
|
||||
"\\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} )$",
|
||||
G_REGEX_EXTENDED, 0, &error);
|
||||
G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "01-abc-24", 0, &match);
|
||||
@ -2015,7 +2016,7 @@ test_recursion (void)
|
||||
gint start;
|
||||
|
||||
error = NULL;
|
||||
regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "(middle)", 0, &match);
|
||||
@ -2032,7 +2033,7 @@ test_recursion (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "((((((((((((((((middle))))))))))))))))", 0, &match);
|
||||
@ -2045,7 +2046,7 @@ test_recursion (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
g_regex_match (regex, "(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()", 0, &match);
|
||||
@ -2054,7 +2055,7 @@ test_recursion (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "<ab<01<23<4>>>>", 0, &match);
|
||||
@ -2073,7 +2074,7 @@ test_recursion (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "abcdcba", 0, &match);
|
||||
@ -2086,7 +2087,7 @@ test_recursion (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "abcdcba", 0, &match);
|
||||
@ -2099,7 +2100,7 @@ test_recursion (void)
|
||||
g_match_info_free (match);
|
||||
g_regex_unref (regex);
|
||||
|
||||
regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error);
|
||||
regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_OPTIMIZE|G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error);
|
||||
g_assert (regex);
|
||||
g_assert_no_error (error);
|
||||
res = g_regex_match (regex, "abcdcba", 0, &match);
|
||||
@ -2219,26 +2220,18 @@ main (int argc, char *argv[])
|
||||
g_test_add_func ("/regex/compile-errors", test_compile_errors);
|
||||
|
||||
/* TEST_NEW(pattern, compile_opts, match_opts) */
|
||||
G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||||
TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL);
|
||||
G_GNUC_END_IGNORE_DEPRECATIONS
|
||||
TEST_NEW("", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||||
TEST_NEW(".*", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||||
G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||||
TEST_NEW(".*", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
|
||||
G_GNUC_END_IGNORE_DEPRECATIONS
|
||||
TEST_NEW(".*", G_REGEX_MULTILINE, G_REGEX_MATCH_DEFAULT);
|
||||
TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_DEFAULT);
|
||||
TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_NOTBOL);
|
||||
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||||
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT);
|
||||
G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||||
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
|
||||
G_GNUC_END_IGNORE_DEPRECATIONS
|
||||
TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT);
|
||||
G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||||
TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
|
||||
G_GNUC_END_IGNORE_DEPRECATIONS
|
||||
/* This gives "internal error: code overflow" with pcre 6.0 */
|
||||
TEST_NEW("(?i)(?-i)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||||
TEST_NEW ("(?i)a", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||||
@ -2249,9 +2242,7 @@ G_GNUC_END_IGNORE_DEPRECATIONS
|
||||
TEST_NEW ("(?U)[a-z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||||
|
||||
/* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */
|
||||
G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||||
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0);
|
||||
G_GNUC_END_IGNORE_DEPRECATIONS
|
||||
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0);
|
||||
TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0);
|
||||
TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0);
|
||||
@ -2540,18 +2531,35 @@ G_GNUC_END_IGNORE_DEPRECATIONS
|
||||
TEST_MATCH_COUNT("(a)?(b)", "b", 0, 0, 3);
|
||||
TEST_MATCH_COUNT("(a)?(b)", "ab", 0, 0, 3);
|
||||
|
||||
/* TEST_PARTIAL(pattern, string, expected) */
|
||||
TEST_PARTIAL("^ab", "a", TRUE);
|
||||
TEST_PARTIAL("^ab", "xa", FALSE);
|
||||
TEST_PARTIAL("ab", "xa", TRUE);
|
||||
TEST_PARTIAL("ab", "ab", FALSE); /* normal match. */
|
||||
TEST_PARTIAL("a+b", "aa", TRUE);
|
||||
TEST_PARTIAL("(a)+b", "aa", TRUE);
|
||||
TEST_PARTIAL("a?b", "a", TRUE);
|
||||
/* TEST_PARTIAL(pattern, string, expected), no JIT */
|
||||
TEST_PARTIAL("^ab", "a", G_REGEX_DEFAULT, TRUE);
|
||||
TEST_PARTIAL("^ab", "xa", G_REGEX_DEFAULT, FALSE);
|
||||
TEST_PARTIAL("ab", "xa", G_REGEX_DEFAULT, TRUE);
|
||||
TEST_PARTIAL("ab", "ab", G_REGEX_DEFAULT, FALSE); /* normal match. */
|
||||
TEST_PARTIAL("a+b", "aa", G_REGEX_DEFAULT, TRUE);
|
||||
TEST_PARTIAL("(a)+b", "aa", G_REGEX_DEFAULT, TRUE);
|
||||
TEST_PARTIAL("a?b", "a", G_REGEX_DEFAULT, TRUE);
|
||||
|
||||
/* Test soft vs. hard partial matching */
|
||||
TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_SOFT, FALSE);
|
||||
TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_HARD, TRUE);
|
||||
/* TEST_PARTIAL(pattern, string, expected) with JIT */
|
||||
TEST_PARTIAL("^ab", "a", G_REGEX_OPTIMIZE, TRUE);
|
||||
TEST_PARTIAL("^ab", "xa", G_REGEX_OPTIMIZE, FALSE);
|
||||
TEST_PARTIAL("ab", "xa", G_REGEX_OPTIMIZE, TRUE);
|
||||
TEST_PARTIAL("ab", "ab", G_REGEX_OPTIMIZE, FALSE); /* normal match. */
|
||||
TEST_PARTIAL("a+b", "aa", G_REGEX_OPTIMIZE, TRUE);
|
||||
TEST_PARTIAL("(a)+b", "aa", G_REGEX_OPTIMIZE, TRUE);
|
||||
TEST_PARTIAL("a?b", "a", G_REGEX_OPTIMIZE, TRUE);
|
||||
|
||||
/* Test soft vs. hard partial matching, no JIT */
|
||||
TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_DEFAULT, G_REGEX_MATCH_PARTIAL_SOFT, FALSE);
|
||||
TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_DEFAULT, G_REGEX_MATCH_PARTIAL_HARD, TRUE);
|
||||
TEST_PARTIAL_FULL("ab+", "ab", G_REGEX_DEFAULT, G_REGEX_MATCH_PARTIAL_SOFT, FALSE);
|
||||
TEST_PARTIAL_FULL("ab+", "ab", G_REGEX_DEFAULT, G_REGEX_MATCH_PARTIAL_HARD, TRUE);
|
||||
|
||||
/* Test soft vs. hard partial matching with JIT */
|
||||
TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_OPTIMIZE, G_REGEX_MATCH_PARTIAL_SOFT, FALSE);
|
||||
TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_OPTIMIZE, G_REGEX_MATCH_PARTIAL_HARD, TRUE);
|
||||
TEST_PARTIAL_FULL("ab+", "ab", G_REGEX_OPTIMIZE, G_REGEX_MATCH_PARTIAL_SOFT, FALSE);
|
||||
TEST_PARTIAL_FULL("ab+", "ab", G_REGEX_OPTIMIZE, G_REGEX_MATCH_PARTIAL_HARD, TRUE);
|
||||
|
||||
/* TEST_SUB_PATTERN(pattern, string, start_position, sub_n, expected_sub,
|
||||
* expected_start, expected_end) */
|
||||
|
Loading…
Reference in New Issue
Block a user