diff --git a/ChangeLog b/ChangeLog index 7104074f9..e97a2fd86 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2008-02-17 Marco Barisione + + * glib/gregex.c: (match_info_new), (g_match_info_next): Don't return + duplicate matches when matching empty strings. (#515944) + * tests/regex-test.c: Add tests. + 2008-02-17 Hans Breuer * glib/gutils.c : define CSIDL_MYPICTURES if not available diff --git a/glib/gregex.c b/glib/gregex.c index cc9aa0e6e..18db8d49f 100644 --- a/glib/gregex.c +++ b/glib/gregex.c @@ -411,7 +411,11 @@ match_info_new (const GRegex *regex, PCRE_INFO_CAPTURECOUNT, &capture_count); match_info->n_offsets = (capture_count + 1) * 3; } + match_info->offsets = g_new0 (gint, match_info->n_offsets); + /* Set an invalid position for the previous match. */ + match_info->offsets[0] = -1; + match_info->offsets[1] = -1; return match_info; } @@ -495,6 +499,8 @@ g_match_info_next (GMatchInfo *match_info, GError **error) { gint opts; + gint prev_match_start; + gint prev_match_end; g_return_val_if_fail (match_info != NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE); @@ -502,6 +508,9 @@ g_match_info_next (GMatchInfo *match_info, opts = match_info->regex->match_opts | match_info->match_opts; + prev_match_start = match_info->offsets[0]; + prev_match_end = match_info->offsets[1]; + match_info->matches = pcre_exec (match_info->regex->pcre_re, match_info->regex->extra, match_info->string, @@ -540,6 +549,25 @@ g_match_info_next (GMatchInfo *match_info, match_info->pos = match_info->offsets[1]; } + /* it's possibile to get two identical matches when we are matching + * empty strings, for instance if the pattern is "(?=[A-Z0-9])" and + * the string is "RegExTest" we have: + * - search at position 0: match from 0 to 0 + * - search at position 1: match from 3 to 3 + * - search at position 3: match from 3 to 3 (duplicate) + * - search at position 4: match from 5 to 5 + * - search at position 5: match from 5 to 5 (duplicate) + * - search at position 6: no match -> stop + * so we have to ignore the duplicates. + * see bug #515944: http://bugzilla.gnome.org/show_bug.cgi?id=515944 */ + if (match_info->matches >= 0 && + prev_match_start == match_info->offsets[0] && + prev_match_end == match_info->offsets[1]) + { + /* ignore this match and search the next one */ + return g_match_info_next (match_info, error); + } + return match_info->matches >= 0; } diff --git a/tests/regex-test.c b/tests/regex-test.c index ede6feb70..2e0005131 100644 --- a/tests/regex-test.c +++ b/tests/regex-test.c @@ -1786,6 +1786,7 @@ main (int argc, char *argv[]) TEST_MATCH_NEXT3("a", "aaxa", -1, 0, "a", 0, 1, "a", 1, 2, "a", 3, 4); TEST_MATCH_NEXT3("a", "aa" OGRAVE "a", -1, 0, "a", 0, 1, "a", 1, 2, "a", 4, 5); TEST_MATCH_NEXT3("a*", "aax", -1, 0, "aa", 0, 2, "", 2, 2, "", 3, 3); + TEST_MATCH_NEXT3("(?=[A-Z0-9])", "RegExTest", -1, 0, "", 0, 0, "", 3, 3, "", 5, 5); TEST_MATCH_NEXT4("a*", "aaxa", -1, 0, "aa", 0, 2, "", 2, 2, "a", 3, 4, "", 4, 4); /* TEST_MATCH_COUNT(pattern, string, start_position, match_opts, expected_count) */ @@ -2047,6 +2048,8 @@ main (int argc, char *argv[]) TEST_REPLACE_LIT("[^-]", "-" EURO "-" AGRAVE "-" HSTROKE, 0, "a", "-a-a-a"); TEST_REPLACE_LIT("[^-]", "-" EURO "-" AGRAVE, 0, "a\\g<0>a", "-a\\g<0>a-a\\g<0>a"); TEST_REPLACE_LIT("-", "-" EURO "-" AGRAVE "-" HSTROKE, 0, "", EURO AGRAVE HSTROKE); + TEST_REPLACE_LIT("(?=[A-Z0-9])", "RegExTest", 0, "_", "_Reg_Ex_Test"); + TEST_REPLACE_LIT("(?=[A-Z0-9])", "RegExTest", 1, "_", "Reg_Ex_Test"); /* TEST_GET_STRING_NUMBER(pattern, name, expected_num) */ TEST_GET_STRING_NUMBER("", "A", -1);