mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-24 04:56:14 +01:00
gregex: Fix return from g_match_info_fetch() for unmatched subpatterns
If there were more subpatterns in the regex than matches (which can happen if one or more of the subpatterns are optional), `g_match_info_fetch()` was erroneously returning `NULL` rather than the empty string. It should only return `NULL` when the `match_num` specifies a subpattern which doesn’t exist in the regex. This is complicated slightly by the fact that when using `g_regex_match_all()`, more matches can be returned than there are subpatterns, due to one or more subpatterns matching multiple times at different offsets in the string. This includes a fix for a unit test which was erroneously checking the broken behaviour. Thanks to Allison Karlitskaya for the minimal reproducer. Signed-off-by: Philip Withnall <pwithnall@endlessos.org> Fixes: #229
This commit is contained in:
parent
601ef3b6be
commit
b052620398
@ -35,6 +35,7 @@
|
|||||||
#include "gmessages.h"
|
#include "gmessages.h"
|
||||||
#include "gstrfuncs.h"
|
#include "gstrfuncs.h"
|
||||||
#include "gatomic.h"
|
#include "gatomic.h"
|
||||||
|
#include "gtestutils.h"
|
||||||
#include "gthread.h"
|
#include "gthread.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -206,7 +207,8 @@ struct _GMatchInfo
|
|||||||
gint ref_count; /* the ref count (atomic) */
|
gint ref_count; /* the ref count (atomic) */
|
||||||
GRegex *regex; /* the regex */
|
GRegex *regex; /* the regex */
|
||||||
GRegexMatchFlags match_opts; /* options used at match time on the regex */
|
GRegexMatchFlags match_opts; /* options used at match time on the regex */
|
||||||
gint matches; /* number of matching sub patterns */
|
gint matches; /* number of matching sub patterns, guaranteed to be <= (n_subpatterns + 1) if doing a single match (rather than matching all) */
|
||||||
|
gint n_subpatterns; /* total number of sub patterns in the regex */
|
||||||
gint pos; /* position in the string where last match left off */
|
gint pos; /* position in the string where last match left off */
|
||||||
gint n_offsets; /* number of offsets */
|
gint n_offsets; /* number of offsets */
|
||||||
gint *offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */
|
gint *offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */
|
||||||
@ -574,6 +576,9 @@ match_info_new (const GRegex *regex,
|
|||||||
match_info->pos = start_position;
|
match_info->pos = start_position;
|
||||||
match_info->match_opts = match_options;
|
match_info->match_opts = match_options;
|
||||||
|
|
||||||
|
pcre_fullinfo (regex->pcre_re, regex->extra,
|
||||||
|
PCRE_INFO_CAPTURECOUNT, &match_info->n_subpatterns);
|
||||||
|
|
||||||
if (is_dfa)
|
if (is_dfa)
|
||||||
{
|
{
|
||||||
/* These values should be enough for most cases, if they are not
|
/* These values should be enough for most cases, if they are not
|
||||||
@ -584,10 +589,7 @@ match_info_new (const GRegex *regex,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
gint capture_count;
|
match_info->n_offsets = (match_info->n_subpatterns + 1) * 3;
|
||||||
pcre_fullinfo (regex->pcre_re, regex->extra,
|
|
||||||
PCRE_INFO_CAPTURECOUNT, &capture_count);
|
|
||||||
match_info->n_offsets = (capture_count + 1) * 3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
match_info->offsets = g_new0 (gint, match_info->n_offsets);
|
match_info->offsets = g_new0 (gint, match_info->n_offsets);
|
||||||
@ -768,6 +770,8 @@ g_match_info_next (GMatchInfo *match_info,
|
|||||||
match_info->pos = match_info->offsets[1];
|
match_info->pos = match_info->offsets[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
g_assert (match_info->matches <= match_info->n_subpatterns + 1);
|
||||||
|
|
||||||
/* it's possible to get two identical matches when we are matching
|
/* it's possible to get two identical matches when we are matching
|
||||||
* empty strings, for instance if the pattern is "(?=[A-Z0-9])" and
|
* empty strings, for instance if the pattern is "(?=[A-Z0-9])" and
|
||||||
* the string is "RegExTest" we have:
|
* the string is "RegExTest" we have:
|
||||||
@ -1044,16 +1048,21 @@ g_match_info_fetch_pos (const GMatchInfo *match_info,
|
|||||||
g_return_val_if_fail (match_info != NULL, FALSE);
|
g_return_val_if_fail (match_info != NULL, FALSE);
|
||||||
g_return_val_if_fail (match_num >= 0, FALSE);
|
g_return_val_if_fail (match_num >= 0, FALSE);
|
||||||
|
|
||||||
|
/* check whether there was an error */
|
||||||
|
if (match_info->matches < 0)
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
/* make sure the sub expression number they're requesting is less than
|
/* make sure the sub expression number they're requesting is less than
|
||||||
* the total number of sub expressions that were matched. */
|
* the total number of sub expressions in the regex. When matching all
|
||||||
if (match_num >= match_info->matches)
|
* (g_regex_match_all()), also compare against the number of matches */
|
||||||
|
if (match_num >= MAX (match_info->n_subpatterns + 1, match_info->matches))
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
if (start_pos != NULL)
|
if (start_pos != NULL)
|
||||||
*start_pos = match_info->offsets[2 * match_num];
|
*start_pos = (match_num < match_info->matches) ? match_info->offsets[2 * match_num] : -1;
|
||||||
|
|
||||||
if (end_pos != NULL)
|
if (end_pos != NULL)
|
||||||
*end_pos = match_info->offsets[2 * match_num + 1];
|
*end_pos = (match_num < match_info->matches) ? match_info->offsets[2 * match_num + 1] : -1;
|
||||||
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
@ -1989,6 +1998,9 @@ g_regex_match_all_full (const GRegex *regex,
|
|||||||
pcre_free (pcre_re);
|
pcre_free (pcre_re);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* don’t assert that (info->matches <= info->n_subpatterns + 1) as that only
|
||||||
|
* holds true for a single match, rather than matching all */
|
||||||
|
|
||||||
/* set info->pos to -1 so that a call to g_match_info_next() fails. */
|
/* set info->pos to -1 so that a call to g_match_info_next() fails. */
|
||||||
info->pos = -1;
|
info->pos = -1;
|
||||||
retval = info->matches >= 0;
|
retval = info->matches >= 0;
|
||||||
|
@ -2557,13 +2557,16 @@ main (int argc, char *argv[])
|
|||||||
TEST_SUB_PATTERN("(a)?(b)", "b", 0, 0, "b", 0, 1);
|
TEST_SUB_PATTERN("(a)?(b)", "b", 0, 0, "b", 0, 1);
|
||||||
TEST_SUB_PATTERN("(a)?(b)", "b", 0, 1, "", -1, -1);
|
TEST_SUB_PATTERN("(a)?(b)", "b", 0, 1, "", -1, -1);
|
||||||
TEST_SUB_PATTERN("(a)?(b)", "b", 0, 2, "b", 0, 1);
|
TEST_SUB_PATTERN("(a)?(b)", "b", 0, 2, "b", 0, 1);
|
||||||
|
TEST_SUB_PATTERN("(a)?b", "b", 0, 0, "b", 0, 1);
|
||||||
|
TEST_SUB_PATTERN("(a)?b", "b", 0, 1, "", -1, -1);
|
||||||
|
TEST_SUB_PATTERN("(a)?b", "b", 0, 2, NULL, UNTOUCHED, UNTOUCHED);
|
||||||
|
|
||||||
/* TEST_NAMED_SUB_PATTERN(pattern, string, start_position, sub_name,
|
/* TEST_NAMED_SUB_PATTERN(pattern, string, start_position, sub_name,
|
||||||
* expected_sub, expected_start, expected_end) */
|
* expected_sub, expected_start, expected_end) */
|
||||||
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", "ab", 0, "A", "b", 1, 2);
|
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", "ab", 0, "A", "b", 1, 2);
|
||||||
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", "aab", 1, "A", "b", 2, 3);
|
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", "aab", 1, "A", "b", 2, 3);
|
||||||
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", EURO "ab", 0, "A", "b", 4, 5);
|
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", EURO "ab", 0, "A", "b", 4, 5);
|
||||||
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", EURO "ab", 0, "B", NULL, UNTOUCHED, UNTOUCHED);
|
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", EURO "ab", 0, "B", "", -1, -1);
|
||||||
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", EURO "ab", 0, "C", NULL, UNTOUCHED, UNTOUCHED);
|
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", EURO "ab", 0, "C", NULL, UNTOUCHED, UNTOUCHED);
|
||||||
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", "a" EGRAVE "x", 0, "A", EGRAVE, 1, 3);
|
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", "a" EGRAVE "x", 0, "A", EGRAVE, 1, 3);
|
||||||
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", "a" EGRAVE "x", 0, "B", "x", 3, 4);
|
TEST_NAMED_SUB_PATTERN("a(?P<A>.)(?P<B>.)?", "a" EGRAVE "x", 0, "B", "x", 3, 4);
|
||||||
|
Loading…
Reference in New Issue
Block a user