From 1c029158f696b0526c922c8c04b4e3bb7ddf7093 Mon Sep 17 00:00:00 2001 From: Christian Persch Date: Fri, 8 Jun 2012 00:49:00 +0200 Subject: [PATCH] regex: Add g_match_info_get_mark Since PCRE 8.03, PCRE supports backtracking control verbs with a name argument. g_match_info_get_mark() will return the argument of the last encountered verb in the whole matching process for failed or partial matches, and in the matching path only for matches. --- docs/reference/glib/glib-sections.txt | 1 + glib/gregex.c | 40 +++++++++++++++++++-- glib/gregex.h | 2 ++ glib/tests/regex.c | 50 +++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 2 deletions(-) diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt index d2e985e68..02652a7b9 100644 --- a/docs/reference/glib/glib-sections.txt +++ b/docs/reference/glib/glib-sections.txt @@ -1026,6 +1026,7 @@ g_regex_check_replacement GMatchInfo g_match_info_get_regex g_match_info_get_string +g_match_info_get_mark g_match_info_ref g_match_info_unref g_match_info_free diff --git a/glib/gregex.c b/glib/gregex.c index f93e62532..683e0ed6b 100644 --- a/glib/gregex.c +++ b/glib/gregex.c @@ -200,6 +200,8 @@ struct _GMatchInfo gint n_workspace; /* number of workspace elements */ const gchar *string; /* string passed to the match function */ gssize string_len; /* length of string */ + /* const */ guchar *mark; /* MARK when using backtracing control */ + pcre_extra extra; /* pcre_extra data */ }; struct _GRegex @@ -578,6 +580,20 @@ match_info_new (const GRegex *regex, match_info->offsets[0] = -1; match_info->offsets[1] = -1; + if (!is_dfa) + { + /* We need a pcre_extra to store a pointer to GMatchInfo::mark + * where pcre_exec will store the MARK. + * Since pcre_exec does not modify the extra data otherwise, + * it should be safe to do a shallow copy here. + */ + if (regex->extra) + match_info->extra = *regex->extra; + + match_info->extra.flags |= PCRE_EXTRA_MARK; + match_info->extra.mark = &match_info->mark; + } + return match_info; } @@ -619,6 +635,27 @@ g_match_info_get_string (const GMatchInfo *match_info) return match_info->string; } +/** + * g_match_info_get_mark: + * @match_info: a #GMatchInfo structure + * + * When the pattern contains backtracking control verbs, and there is + * a match, returns the argument of the verb last encountered on the + * matching path. If there is a partial match, or no match, returns + * the argument of the last verb encountered in the whole matching + * process. Otherwise, $NULL is returned. + * + * Returns: (transfer none): the mark, or %NULL + * + * Since: 2.34 + */ +const gchar * +g_match_info_get_mark (const GMatchInfo *match_info) +{ + g_return_val_if_fail (match_info != NULL, NULL); + return (const gchar *) match_info->mark; +} + /** * g_match_info_ref: * @match_info: a #GMatchInfo @@ -715,7 +752,7 @@ g_match_info_next (GMatchInfo *match_info, } match_info->matches = pcre_exec (match_info->regex->pcre_re, - match_info->regex->extra, + &match_info->extra, match_info->string, match_info->string_len, match_info->pos, @@ -1209,7 +1246,6 @@ g_match_info_fetch_all (const GMatchInfo *match_info) return result; } - /* GRegex */ GQuark diff --git a/glib/gregex.h b/glib/gregex.h index 752ed48b5..9e9501e22 100644 --- a/glib/gregex.h +++ b/glib/gregex.h @@ -524,6 +524,8 @@ gboolean g_regex_check_replacement (const gchar *replacement, /* Match info */ GRegex *g_match_info_get_regex (const GMatchInfo *match_info); const gchar *g_match_info_get_string (const GMatchInfo *match_info); +const gchar *g_match_info_get_mark (const GMatchInfo *match_info); + GMatchInfo *g_match_info_ref (GMatchInfo *match_info); void g_match_info_unref (GMatchInfo *match_info); diff --git a/glib/tests/regex.c b/glib/tests/regex.c index 1118da7d9..b0b15662f 100644 --- a/glib/tests/regex.c +++ b/glib/tests/regex.c @@ -2044,6 +2044,48 @@ test_explicit_crlf (void) g_regex_unref (regex); } + +typedef struct { + const gchar *pattern; + const gchar *string; + const gchar *mark; + gboolean expected; +} TestMarkData; + +static void +test_mark (gconstpointer d) +{ + const TestMarkData *data = d; + GRegex *regex; + GMatchInfo *info; + gboolean match; + GError *error = NULL; + + regex = g_regex_new (data->pattern, 0, 0, &error); + g_assert_no_error (error); + + match = g_regex_match_full (regex, data->string, -1, 0, 0, &info, NULL); + g_assert_cmpint (match, ==, data->expected); + g_assert_cmpstr (g_match_info_get_mark (info), ==, data->mark); + + g_match_info_free (info); + g_regex_unref (regex); +} + +#define TEST_MARK(_pattern, _string, _expected, _mark) \ +{ \ + TestMarkData *data; \ + gchar *path; \ + data = g_new0 (TestMarkData, 1); \ + data->pattern = _pattern; \ + data->string = _string; \ + data->mark = _mark; \ + data->expected = _expected; \ + path = g_strdup_printf ("/regex/mark/%d", ++total); \ + g_test_add_data_func (path, data, test_mark); \ + g_free (path); \ +} + int main (int argc, char *argv[]) { @@ -2679,5 +2721,13 @@ main (int argc, char *argv[]) TEST_MATCH_NOTEMPTY("a?b?", "xyz", FALSE); TEST_MATCH_NOTEMPTY_ATSTART("a?b?", "xyz", TRUE); + /* MARK */ + TEST_MARK("^(A(*PRUNE:A)B|C(*PRUNE:B)D)", "AC", FALSE, "A"); + TEST_MARK("^(A(*PRUNE:A)B|C(*PRUNE:B)D)", "CB", FALSE, "B"); + TEST_MARK("(*MARK:A)(*SKIP:B)(C|X)", "C", TRUE, "A"); + TEST_MARK("(*MARK:A)(*SKIP:B)(C|X)", "D", FALSE, "A"); + TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XY", TRUE, "A"); + TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XZ", TRUE, "B"); + return g_test_run (); }