Don't return duplicate matches when matching empty strings. (#515944) Add

2008-02-17  Marco Barisione  <marco@barisione.org>

	* glib/gregex.c: (match_info_new), (g_match_info_next): Don't return
	duplicate matches when matching empty strings.  (#515944)
	* tests/regex-test.c: Add tests.

svn path=/trunk/; revision=6527
This commit is contained in:
Marco Barisione 2008-02-17 14:07:32 +00:00 committed by Marco Barisione
parent 521e741d56
commit 7e0677b076
3 changed files with 37 additions and 0 deletions

View File

@ -1,3 +1,9 @@
2008-02-17 Marco Barisione <marco@barisione.org>
* glib/gregex.c: (match_info_new), (g_match_info_next): Don't return
duplicate matches when matching empty strings. (#515944)
* tests/regex-test.c: Add tests.
2008-02-17 Hans Breuer <hans@breuer.org> 2008-02-17 Hans Breuer <hans@breuer.org>
* glib/gutils.c : define CSIDL_MYPICTURES if not available * glib/gutils.c : define CSIDL_MYPICTURES if not available

View File

@ -411,7 +411,11 @@ match_info_new (const GRegex *regex,
PCRE_INFO_CAPTURECOUNT, &capture_count); PCRE_INFO_CAPTURECOUNT, &capture_count);
match_info->n_offsets = (capture_count + 1) * 3; match_info->n_offsets = (capture_count + 1) * 3;
} }
match_info->offsets = g_new0 (gint, match_info->n_offsets); match_info->offsets = g_new0 (gint, match_info->n_offsets);
/* Set an invalid position for the previous match. */
match_info->offsets[0] = -1;
match_info->offsets[1] = -1;
return match_info; return match_info;
} }
@ -495,6 +499,8 @@ g_match_info_next (GMatchInfo *match_info,
GError **error) GError **error)
{ {
gint opts; gint opts;
gint prev_match_start;
gint prev_match_end;
g_return_val_if_fail (match_info != NULL, FALSE); g_return_val_if_fail (match_info != NULL, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
@ -502,6 +508,9 @@ g_match_info_next (GMatchInfo *match_info,
opts = match_info->regex->match_opts | match_info->match_opts; opts = match_info->regex->match_opts | match_info->match_opts;
prev_match_start = match_info->offsets[0];
prev_match_end = match_info->offsets[1];
match_info->matches = pcre_exec (match_info->regex->pcre_re, match_info->matches = pcre_exec (match_info->regex->pcre_re,
match_info->regex->extra, match_info->regex->extra,
match_info->string, match_info->string,
@ -540,6 +549,25 @@ g_match_info_next (GMatchInfo *match_info,
match_info->pos = match_info->offsets[1]; match_info->pos = match_info->offsets[1];
} }
/* it's possibile to get two identical matches when we are matching
* empty strings, for instance if the pattern is "(?=[A-Z0-9])" and
* the string is "RegExTest" we have:
* - search at position 0: match from 0 to 0
* - search at position 1: match from 3 to 3
* - search at position 3: match from 3 to 3 (duplicate)
* - search at position 4: match from 5 to 5
* - search at position 5: match from 5 to 5 (duplicate)
* - search at position 6: no match -> stop
* so we have to ignore the duplicates.
* see bug #515944: http://bugzilla.gnome.org/show_bug.cgi?id=515944 */
if (match_info->matches >= 0 &&
prev_match_start == match_info->offsets[0] &&
prev_match_end == match_info->offsets[1])
{
/* ignore this match and search the next one */
return g_match_info_next (match_info, error);
}
return match_info->matches >= 0; return match_info->matches >= 0;
} }

View File

@ -1786,6 +1786,7 @@ main (int argc, char *argv[])
TEST_MATCH_NEXT3("a", "aaxa", -1, 0, "a", 0, 1, "a", 1, 2, "a", 3, 4); TEST_MATCH_NEXT3("a", "aaxa", -1, 0, "a", 0, 1, "a", 1, 2, "a", 3, 4);
TEST_MATCH_NEXT3("a", "aa" OGRAVE "a", -1, 0, "a", 0, 1, "a", 1, 2, "a", 4, 5); TEST_MATCH_NEXT3("a", "aa" OGRAVE "a", -1, 0, "a", 0, 1, "a", 1, 2, "a", 4, 5);
TEST_MATCH_NEXT3("a*", "aax", -1, 0, "aa", 0, 2, "", 2, 2, "", 3, 3); TEST_MATCH_NEXT3("a*", "aax", -1, 0, "aa", 0, 2, "", 2, 2, "", 3, 3);
TEST_MATCH_NEXT3("(?=[A-Z0-9])", "RegExTest", -1, 0, "", 0, 0, "", 3, 3, "", 5, 5);
TEST_MATCH_NEXT4("a*", "aaxa", -1, 0, "aa", 0, 2, "", 2, 2, "a", 3, 4, "", 4, 4); TEST_MATCH_NEXT4("a*", "aaxa", -1, 0, "aa", 0, 2, "", 2, 2, "a", 3, 4, "", 4, 4);
/* TEST_MATCH_COUNT(pattern, string, start_position, match_opts, expected_count) */ /* TEST_MATCH_COUNT(pattern, string, start_position, match_opts, expected_count) */
@ -2047,6 +2048,8 @@ main (int argc, char *argv[])
TEST_REPLACE_LIT("[^-]", "-" EURO "-" AGRAVE "-" HSTROKE, 0, "a", "-a-a-a"); TEST_REPLACE_LIT("[^-]", "-" EURO "-" AGRAVE "-" HSTROKE, 0, "a", "-a-a-a");
TEST_REPLACE_LIT("[^-]", "-" EURO "-" AGRAVE, 0, "a\\g<0>a", "-a\\g<0>a-a\\g<0>a"); TEST_REPLACE_LIT("[^-]", "-" EURO "-" AGRAVE, 0, "a\\g<0>a", "-a\\g<0>a-a\\g<0>a");
TEST_REPLACE_LIT("-", "-" EURO "-" AGRAVE "-" HSTROKE, 0, "", EURO AGRAVE HSTROKE); TEST_REPLACE_LIT("-", "-" EURO "-" AGRAVE "-" HSTROKE, 0, "", EURO AGRAVE HSTROKE);
TEST_REPLACE_LIT("(?=[A-Z0-9])", "RegExTest", 0, "_", "_Reg_Ex_Test");
TEST_REPLACE_LIT("(?=[A-Z0-9])", "RegExTest", 1, "_", "Reg_Ex_Test");
/* TEST_GET_STRING_NUMBER(pattern, name, expected_num) */ /* TEST_GET_STRING_NUMBER(pattern, name, expected_num) */
TEST_GET_STRING_NUMBER("", "A", -1); TEST_GET_STRING_NUMBER("", "A", -1);