diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt index 298e9fcfa..c595ade3b 100644 --- a/docs/reference/glib/glib-sections.txt +++ b/docs/reference/glib/glib-sections.txt @@ -999,6 +999,7 @@ GRegexCompileFlags GRegexMatchFlags GRegex GRegexEvalCallback +g_regex_jit_supported g_regex_new g_regex_ref g_regex_unref @@ -1028,6 +1029,7 @@ GMatchInfo g_match_info_get_regex g_match_info_get_string g_match_info_get_mark +g_match_info_get_jited g_match_info_ref g_match_info_unref g_match_info_free diff --git a/glib/gregex.c b/glib/gregex.c index a06486868..710e8ec46 100644 --- a/glib/gregex.c +++ b/glib/gregex.c @@ -117,12 +117,21 @@ G_REGEX_NEWLINE_ANYCRLF | \ G_REGEX_BSR_ANYCRLF | \ G_REGEX_JAVASCRIPT_COMPAT | \ - G_REGEX_NO_START_OPTIMIZE) + G_REGEX_NO_START_OPTIMIZE | \ + G_REGEX_JIT | \ + G_REGEX_JIT_PARTIAL_SOFT | \ + G_REGEX_JIT_PARTIAL_HARD) + +/* All JIT flags from GRegexCompileFlags */ +#define G_REGEX_COMPILE_JIT_MASK (G_REGEX_JIT | \ + G_REGEX_JIT_PARTIAL_SOFT | \ + G_REGEX_JIT_PARTIAL_HARD) /* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */ #define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK) #define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW | \ - G_REGEX_OPTIMIZE) + G_REGEX_OPTIMIZE | \ + G_REGEX_COMPILE_JIT_MASK) /* Mask of all the possible values for GRegexMatchFlags. */ #define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \ @@ -183,6 +192,9 @@ G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART); */ G_STATIC_ASSERT (G_REGEX_OPTIMIZE == PCRE_NO_UTF8_CHECK); G_STATIC_ASSERT (G_REGEX_RAW == PCRE_UTF8); +G_STATIC_ASSERT (G_REGEX_JIT == PCRE_NOTBOL); +G_STATIC_ASSERT (G_REGEX_JIT_PARTIAL_SOFT == PCRE_PARTIAL_SOFT); +G_STATIC_ASSERT (G_REGEX_JIT_PARTIAL_HARD == PCRE_PARTIAL_HARD); /* if the string is in UTF-8 use g_utf8_ functions, else use * use just +/- 1. */ @@ -208,6 +220,7 @@ struct _GMatchInfo gssize string_len; /* length of string */ /* const */ guchar *mark; /* MARK when using backtracing control */ pcre_extra extra; /* pcre_extra data */ + guint jited : 1; /* whether the matching was using the JIT code */ }; struct _GRegex @@ -297,6 +310,8 @@ match_error (gint errcode) return _("short utf8"); case PCRE_ERROR_RECURSELOOP: return _("recursion loop"); + case PCRE_ERROR_JIT_STACKLIMIT: + return _("JIT stack limit exceeded"); default: break; } @@ -546,6 +561,15 @@ translate_compile_error (gint *errcode, const gchar **errmsg) /* GMatchInfo */ +static pcre_jit_stack * +jit_stack_cb (void *arg) +{ + GMatchInfo *info = arg; + + info->jited = TRUE; + return NULL; /* keep using the internal 32K stack */ +} + static GMatchInfo * match_info_new (const GRegex *regex, const gchar *string, @@ -601,6 +625,8 @@ match_info_new (const GRegex *regex, match_info->extra.flags |= PCRE_EXTRA_MARK; match_info->extra.mark = &match_info->mark; + + pcre_assign_jit_stack(&match_info->extra, jit_stack_cb, match_info); } return match_info; @@ -668,6 +694,30 @@ g_match_info_get_mark (const GMatchInfo *match_info) return (const gchar *) match_info->mark; } +/** + * g_match_info_get_jited: + * @match_info: a #GMatchInfo structure + * + * Returns whether matching was done using the JIT code. + * + * See man:pcrejit(3) for more information on + * JIT. + * + * Note that if the @match_info was generated using the alternative DFA + * algorithm by g_regex_match_all() or g_regex_match_all_full(), the + * JIT code is never used, and therefore this function returns %FALSE. + * + * Returns: (transfer none): the mark, or %NULL + * + * Since: 2.34 + */ +gboolean +g_match_info_get_jited (const GMatchInfo *match_info) +{ + g_return_val_if_fail (match_info != NULL, FALSE); + return match_info->jited; +} + /** * g_match_info_ref: * @match_info: a #GMatchInfo @@ -1324,6 +1374,37 @@ g_regex_unref (GRegex *regex) * Compiles the regular expression to an internal form, and does * the initial setup of the #GRegex structure. * + * Since 2.34, and only on some platforms, there is support for + * Just-In-Time (JIT) code generation. Using JIT code speeds up the matching + * process, at the expense of using more time in g_regex_new(). + * It is therefore most useful when matching the same pattern often, or + * when matching large amounts of data. + * + * If @compile_options includes the %G_REGEX_JIT flag, JIT code is generated + * for matching complete matches. + * If @compile_options includes the %G_REGEX_JIT_PARTIAL_SOFT flag, JIT code is + * generated for partial matching with the %G_REGEX_MATCH_PARTIAL_SOFT match flag. + * Note that if @match_options includes the %G_REGEX_MATCH_PARTIAL_SOFT flag, + * then the %G_REGEX_JIT_PARTIAL_SOFT flag is set implicitly. + * If @compile_options includes the %G_REGEX_JIT_PARTIAL_HARD flag, JIT code is + * generated for partial matching with the %G_REGEX_MATCH_PARTIAL_HARD match flag. + * Note that if @match_options includes the %G_REGEX_MATCH_PARTIAL_HARD flag, + * then the %G_REGEX_JIT_PARTIAL_HARD flag is set implicitly. + * + * Note that JIT code is not supported for all patterns and all compile for match + * flags; to check if there actually was JIT code generated for the pattern, you + * can test the result of g_regex_get_compile_flags() for the corresponding flag. + * At this time, only the %G_REGEX_MATCH_NOTBOL, %G_REGEX_MATCH_NOTEOL, + * %G_REGEX_MATCH_NOTEMPTY, G_REGEX_MATCH_NOTEMPTY_ATSTART, + * %G_REGEX_MATCH_PARTIAL_SOFT and %G_REGEX_MATCH_PARTIAL_HARD flags are compatible + * with JIT code generation. + * + * Also note that JIT code is generated for the specific set of flags + * passed in @compile_options and @match_options; if you do matching with an + * incompatible set of match flags, the JIT code will not be used. + * + * See man:pcrejit(3) for more information on JIT code generation. + * * Returns: a #GRegex structure. Call g_regex_unref() when you * are done with it * @@ -1341,6 +1422,7 @@ g_regex_new (const gchar *pattern, gint erroffset; gint errcode; gboolean optimize = FALSE; + gint study_options; static volatile gsize initialised = 0; unsigned long int pcre_compile_options; GRegexCompileFlags nonpcre_compile_options; @@ -1409,6 +1491,40 @@ g_regex_new (const gchar *pattern, if (~compile_options & G_REGEX_BSR_ANYCRLF) compile_options |= PCRE_BSR_UNICODE; + study_options = 0; + if (compile_options & G_REGEX_COMPILE_JIT_MASK) + { + optimize = TRUE; + + /* G_REGEX_COMPILE_JIT has the same value as PCRE_UTF8_CHECK, + * as we do not need to wrap PCRE_NO_UTF8_CHECK. + */ + if (compile_options & G_REGEX_JIT) + { + study_options |= PCRE_STUDY_JIT_COMPILE; + + /* If the given match options include G_REGEX_MATCH_PARTIAL_{SOFT,HARD} + * force the G_REGEX_JIT_PARTIAL_{SOFT,HARD} flags. + */ + if (match_options & G_REGEX_MATCH_PARTIAL_SOFT) + compile_options |= G_REGEX_JIT_PARTIAL_SOFT; + if (match_options & G_REGEX_MATCH_PARTIAL_HARD) + compile_options |= G_REGEX_JIT_PARTIAL_HARD; + } + + /* G_REGEX_COMPILE_JIT_PARTIAL_{SOFT,HARD} have the same value + * as PCRE_PARTIAL_{SOFT,HARD} as these are match options, not + * compile options and are thus free to use in GRegexcFlags. + */ + if (compile_options & G_REGEX_JIT_PARTIAL_SOFT) + study_options |= PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE; + if (compile_options & G_REGEX_JIT_PARTIAL_HARD) + study_options |= PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE; + + /* Mask out these options as they're not meant for pcre_compile2 */ + compile_options &= ~G_REGEX_COMPILE_JIT_MASK; + } + /* compile the pattern */ re = pcre_compile2 (pattern, compile_options, &errcode, &errmsg, &erroffset, NULL); @@ -1464,7 +1580,7 @@ g_regex_new (const gchar *pattern, if (optimize) { - regex->extra = pcre_study (regex->pcre_re, 0, &errmsg); + regex->extra = pcre_study (regex->pcre_re, study_options, &errmsg); if (errmsg != NULL) { GError *tmp_error = g_error_new (G_REGEX_ERROR, @@ -1478,6 +1594,16 @@ g_regex_new (const gchar *pattern, g_regex_unref (regex); return NULL; } + + /* Check whether the pattern was actually JITed */ + if (study_options != 0) + { + int jited = FALSE; + + pcre_fullinfo (re, regex->extra, PCRE_INFO_JIT, &jited); + if (!jited) + regex->compile_opts &= ~G_REGEX_COMPILE_JIT_MASK; + } } return regex; @@ -1691,6 +1817,8 @@ g_regex_match_simple (const gchar *pattern, * To retrieve all the non-overlapping matches of the pattern in * string you can use g_match_info_next(). * + * See g_regex_match_full() for more information. + * * |[ * static void * print_uppercase_words (const gchar *string) @@ -1792,6 +1920,31 @@ g_regex_match (const GRegex *regex, * } * ]| * + * If JIT code was generated by passing the %G_REGEX_JIT, + * %G_REGEX_JIT_PARTIAL_SOFT or %G_REGEX_JIT_PARTIAL_HARD flags to + * g_regex_new(), then matching may be done using the JIT code, if the + * @match_options passed are compatible with it, that is, with the exception + * of the %G_REGEX_MATCH_PARTIAL_SOFT and %G_REGEX_MATCH_PARTIAL_HARD flags, + * no match flag may be passed that was not passed when creating @regex, and no + * match flag be omitted that was passed when creating @regex. + * + * If @regex was created using the %G_REGEX_JIT flag, and + * g_regex_get_compile_flags() contains the %G_REGEX_JIT flag, then complete + * matching will be done using the JIT code. + * If @regex was created using the %G_REGEX_JIT_PARTIAL_SOFT flag, and + * g_regex_get_compile_flags() contains the %G_REGEX_JIT_PARTIAL_SOFT flag, then + * partial matchin with %G_REGEX_MATCH_PARTIAL_SOFT matching will be done using + * the JIT code. + * If @regex was created using the %G_REGEX_JIT_PARTIAL_HARD flag, and + * g_regex_get_compile_flags() contains the %G_REGEX_JIT_PARTIAL_HARD flag, then + * partial matchin with %G_REGEX_MATCH_PARTIAL_HARD matching will be done using + * the JIT code. + * + * You can use g_match_info_get_jited() on the returned @match_info to check + * whether the JIT code was used. + * + * See man:pcrejit(3) for more information on using JIT code. + * * Returns: %TRUE is the string matched, %FALSE otherwise * * Since: 2.14 @@ -1848,6 +2001,10 @@ g_regex_match_full (const GRegex *regex, * you use any #GMatchInfo method (except g_match_info_free()) after * freeing or modifying @string then the behaviour is undefined. * + * Note that matching using this function never uses the JIT code + * generated by passing %G_REGEX_JIT, %G_REGEX_JIT_PARTIAL_SOFT or + * %G_REGEX_JIT_PARTIAL_HARD to g_regex_new(). + * * Returns: %TRUE is the string matched, %FALSE otherwise * * Since: 2.14 @@ -1910,6 +2067,10 @@ g_regex_match_all (const GRegex *regex, * you use any #GMatchInfo method (except g_match_info_free()) after * freeing or modifying @string then the behaviour is undefined. * + * Note that matching using this function never uses the JIT code + * generated by passing %G_REGEX_JIT, %G_REGEX_JIT_PARTIAL_SOFT or + * %G_REGEX_JIT_PARTIAL_HARD to g_regex_new(). + * * Returns: %TRUE is the string matched, %FALSE otherwise * * Since: 2.14 @@ -3183,3 +3344,27 @@ g_regex_escape_string (const gchar *string, return g_string_free (escaped, FALSE); } + +/** + * g_regex_jit_supported: + * + * Returns whether JIT accelerated regex evaulation is available + * on this platform. Currently supported are: + * * ARM v5, v7, and Thumb2 + * * Intel x86 32-bit and 64-bit + * * MIPS 32-bit + * * Power PC 32-bit and 64-bit + * + * Returns: %TRUE if JITing is supported + * + * Since: 2.34 + */ +gboolean +g_regex_jit_supported (void) +{ + int jit; + + pcre_config(PCRE_CONFIG_JIT, &jit); + + return jit != 0; +} diff --git a/glib/gregex.h b/glib/gregex.h index 33e545142..4907217e9 100644 --- a/glib/gregex.h +++ b/glib/gregex.h @@ -284,6 +284,11 @@ GQuark g_regex_error_quark (void); * characters '\r', '\n' and '\r\n'. Since: 2.34 * @G_REGEX_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect * results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34 + * @G_REGEX_JIT: generate JIT code for complete matches. Since: 2.34 + * @G_REGEX_JIT_PARTIAL_SOFT: generate JIT code for partial matching using + * %G_REGEX_MATCH_PARTIAL_SOFT. Since: 2.34 + * @G_REGEX_JIT_PARTIAL_HARD: generate JIT code for partial matching using + * %G_REGEX_MATCH_PARTIAL_HARD. Since: 2.34 * * Flags specifying compile-time options. * @@ -311,7 +316,10 @@ typedef enum G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22, G_REGEX_BSR_ANYCRLF = 1 << 23, G_REGEX_JAVASCRIPT_COMPAT = 1 << 25, - G_REGEX_NO_START_OPTIMIZE = 1 << 26 + G_REGEX_NO_START_OPTIMIZE = 1 << 26, + G_REGEX_JIT = 1 << 7, + G_REGEX_JIT_PARTIAL_SOFT = 1 << 15, + G_REGEX_JIT_PARTIAL_HARD = 1 << 27 } GRegexCompileFlags; /** @@ -441,6 +449,8 @@ typedef gboolean (*GRegexEvalCallback) (const GMatchInfo *match_info, gpointer user_data); +gboolean g_regex_jit_supported (void); + GRegex *g_regex_new (const gchar *pattern, GRegexCompileFlags compile_options, GRegexMatchFlags match_options, @@ -537,7 +547,7 @@ gboolean g_regex_check_replacement (const gchar *replacement, GRegex *g_match_info_get_regex (const GMatchInfo *match_info); const gchar *g_match_info_get_string (const GMatchInfo *match_info); const gchar *g_match_info_get_mark (const GMatchInfo *match_info); - +gboolean g_match_info_get_jited (const GMatchInfo *match_info); GMatchInfo *g_match_info_ref (GMatchInfo *match_info); void g_match_info_unref (GMatchInfo *match_info); diff --git a/glib/tests/regex.c b/glib/tests/regex.c index ec30d5cfe..10f18a0af 100644 --- a/glib/tests/regex.c +++ b/glib/tests/regex.c @@ -141,6 +141,8 @@ typedef struct { GRegexCompileFlags compile_opts; GRegexMatchFlags match_opts; gboolean expected; + gboolean check_jited; + gboolean expected_jited; gssize string_len; gint start_position; GRegexMatchFlags match_opts2; @@ -182,6 +184,7 @@ test_match (gconstpointer d) { const TestMatchData *data = d; GRegex *regex; + GMatchInfo *info; gboolean match; GError *error = NULL; @@ -190,21 +193,28 @@ test_match (gconstpointer d) g_assert_no_error (error); match = g_regex_match_full (regex, data->string, data->string_len, - data->start_position, data->match_opts2, NULL, NULL); + data->start_position, data->match_opts2, &info, NULL); g_assert_cmpint (match, ==, data->expected); + if (data->check_jited) + g_assert_cmpint (g_match_info_get_jited (info), ==, data->expected_jited); + g_match_info_free (info); if (data->string_len == -1 && data->start_position == 0) { match = g_regex_match (regex, data->string, data->match_opts2, NULL); g_assert_cmpint (match, ==, data->expected); + if (data->check_jited) + g_assert_cmpint (g_match_info_get_jited (info), ==, data->expected_jited); + g_match_info_free (info); } g_regex_unref (regex); } -#define TEST_MATCH(_pattern, _compile_opts, _match_opts, _string, \ - _string_len, _start_position, _match_opts2, _expected) { \ +#define TEST_MATCH_FULL(_extra_name, _pattern, _compile_opts, _match_opts, _string, \ + _string_len, _start_position, _match_opts2, _expected, \ + _check_jited, _expected_jited) { \ TestMatchData *data; \ gchar *path; \ data = g_new0 (TestMatchData, 1); \ @@ -216,11 +226,22 @@ test_match (gconstpointer d) data->start_position = _start_position; \ data->match_opts2 = _match_opts2; \ data->expected = _expected; \ - path = g_strdup_printf ("/regex/match/%d", ++total); \ + data->check_jited = _check_jited; \ + data->expected_jited = _expected_jited; \ + path = g_strdup_printf ("/regex/match/%s%d", _extra_name, ++total); \ g_test_add_data_func (path, data, test_match); \ g_free (path); \ } +#define TEST_MATCH(_pattern, _compile_opts, _match_opts, _string, \ + _string_len, _start_position, _match_opts2, _expected) \ + TEST_MATCH_FULL("", _pattern, _compile_opts, _match_opts, _string,\ + _string_len, _start_position, _match_opts2, _expected, FALSE, FALSE) +#define TEST_MATCH_JIT(_pattern, _compile_opts, _match_opts, _string, \ + _string_len, _start_position, _match_opts2, _expected, _expected_jited) \ + TEST_MATCH_FULL("jit/", _pattern, G_REGEX_JIT|(_compile_opts), _match_opts, _string,\ + _string_len, _start_position, _match_opts2, _expected, TRUE, _expected_jited) + struct _Match { gchar *string; @@ -473,11 +494,11 @@ test_partial (gconstpointer d) GRegex *regex; GMatchInfo *match_info; - regex = g_regex_new (data->pattern, 0, 0, NULL); + regex = g_regex_new (data->pattern, data->compile_opts, data->match_opts, NULL); g_assert (regex != NULL); - g_regex_match (regex, data->string, data->match_opts, &match_info); + g_regex_match (regex, data->string, data->match_opts2, &match_info); g_assert_cmpint (data->expected, ==, g_match_info_is_partial_match (match_info)); @@ -487,24 +508,40 @@ test_partial (gconstpointer d) g_assert (!g_match_info_fetch_pos (match_info, 1, NULL, NULL)); } + if (data->check_jited) + g_assert_cmpint (g_match_info_get_jited (match_info), ==, data->expected_jited); + g_match_info_free (match_info); g_regex_unref (regex); } -#define TEST_PARTIAL_FULL(_pattern, _string, _match_opts, _expected) { \ +#define TEST_PARTIAL_FULL(_extra_name, _pattern, _compile_opts, _match_opts, \ + _string, _match_opts2, _expected, \ + _check_jited, _expected_jited) { \ TestMatchData *data; \ gchar *path; \ data = g_new0 (TestMatchData, 1); \ data->pattern = _pattern; \ data->string = _string; \ - data->match_opts = _match_opts; \ - data->expected = _expected; \ - path = g_strdup_printf ("/regex/match/partial/%d", ++total); \ + data->compile_opts = _compile_opts; \ + data->match_opts = _match_opts; \ + data->match_opts2 = _match_opts2; \ + data->expected = _expected; \ + data->check_jited = _check_jited; \ + data->expected_jited = _expected_jited; \ + path = g_strdup_printf ("/regex/match/partial/%s%d", _extra_name, ++total); \ g_test_add_data_func (path, data, test_partial); \ g_free (path); \ } -#define TEST_PARTIAL(_pattern, _string, _expected) TEST_PARTIAL_FULL(_pattern, _string, G_REGEX_MATCH_PARTIAL, _expected) +#define TEST_PARTIAL_WITH_FLAGS(_pattern, _string, _match_opts2, _expected) \ + TEST_PARTIAL_FULL("", _pattern, 0, 0 , _string, _match_opts2, _expected, FALSE, FALSE) +#define TEST_PARTIAL(_pattern, _string, _expected) \ + TEST_PARTIAL_WITH_FLAGS(_pattern, _string, G_REGEX_MATCH_PARTIAL_SOFT, _expected) +#define TEST_PARTIAL_FULL_JIT(_pattern, _compile_opts, _match_opts, _string, _match_opts2, \ + _expected, _expected_jited) \ + TEST_PARTIAL_FULL("jit/", _pattern, G_REGEX_JIT|(_compile_opts), _match_opts, _string, _match_opts2, \ + _expected, TRUE, _expected_jited) typedef struct { const gchar *pattern; @@ -2404,8 +2441,8 @@ main (int argc, char *argv[]) TEST_PARTIAL("a?b", "a", TRUE); /* Test soft vs. hard partial matching */ - TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_SOFT, FALSE); - TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_HARD, TRUE); + TEST_PARTIAL_WITH_FLAGS("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_SOFT, FALSE); + TEST_PARTIAL_WITH_FLAGS("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_HARD, TRUE); /* TEST_SUB_PATTERN(pattern, string, start_position, sub_n, expected_sub, * expected_start, expected_end) */ @@ -2731,5 +2768,17 @@ main (int argc, char *argv[]) TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XY", TRUE, "A"); TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XZ", TRUE, "B"); + if (g_regex_jit_supported ()) + { + /* Test JITing compile */ + TEST_NEW("foo", G_REGEX_JIT, 0); + + TEST_MATCH_JIT("^ab", 0, 0, "ab", -1, 0, 0, TRUE, TRUE); + TEST_PARTIAL_FULL_JIT("cat(fish)?", 0, 0, "cat", G_REGEX_MATCH_PARTIAL_SOFT, FALSE, FALSE); + TEST_PARTIAL_FULL_JIT("cat(fish)?", 0, 0, "cat", G_REGEX_MATCH_PARTIAL_HARD, TRUE, FALSE); + TEST_PARTIAL_FULL_JIT("cat(fish)?", 0, G_REGEX_MATCH_PARTIAL_SOFT, "cat", G_REGEX_MATCH_PARTIAL_SOFT, FALSE, TRUE); + TEST_PARTIAL_FULL_JIT("cat(fish)?", 0, G_REGEX_MATCH_PARTIAL_HARD, "cat", G_REGEX_MATCH_PARTIAL_HARD, TRUE, TRUE); + } + return g_test_run (); }