regex: Add PARTIAL_HARD match option

Since PCRE 8.00 it supports a new partial matching method PCRE_PARTIAL_HARD.
This commit is contained in:
Christian Persch 2012-06-07 22:50:52 +02:00
parent 0a2967030a
commit 1171215014
3 changed files with 34 additions and 7 deletions

View File

@ -146,7 +146,9 @@
G_REGEX_MATCH_NEWLINE_ANY | \
G_REGEX_MATCH_NEWLINE_ANYCRLF | \
G_REGEX_MATCH_BSR_ANYCRLF | \
G_REGEX_MATCH_BSR_ANY)
G_REGEX_MATCH_BSR_ANY | \
G_REGEX_MATCH_PARTIAL_SOFT | \
G_REGEX_MATCH_PARTIAL_HARD)
/* we rely on these flags having the same values */
G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
@ -177,6 +179,8 @@ G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
/* These PCRE flags are unused or not exposed publically in GRegexFlags, so
* it should be ok to reuse them for different things.
@ -849,13 +853,21 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
* able to raise an error as soon as a mistake is made.
*
* GRegex supports the concept of partial matching by means of the
* #G_REGEX_MATCH_PARTIAL flag. When this is set the return code for
* #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD flags.
* When they are used, the return code for
* g_regex_match() or g_regex_match_full() is, as usual, %TRUE
* for a complete match, %FALSE otherwise. But, when these functions
* return %FALSE, you can check if the match was partial calling
* g_match_info_is_partial_match().
*
* When using partial matching you cannot use g_match_info_fetch*().
* The difference between #G_REGEX_MATCH_PARTIAL_SOFT and
* #G_REGEX_MATCH_PARTIAL_HARD is that when a partial match is encountered
* with #G_REGEX_MATCH_PARTIAL_SOFT, matching continues to search for a
* possible complete match, while with #G_REGEX_MATCH_PARTIAL_HARD matching
* stops at the partial match.
* When both #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD
* are set, the latter takes precedence.
* See <ulink>man:pcrepartial</ulink> for more information on partial matching.
*
* Because of the way certain internal optimizations are implemented
* the partial matching algorithm cannot be used with all patterns.
@ -864,7 +876,8 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
* of occurrences is greater than one. Optional items such as "\d?"
* (where the maximum is one) are permitted. Quantifiers with any values
* are permitted after parentheses, so the invalid examples above can be
* coded thus "(a){2,4}" and "(\d)+". If #G_REGEX_MATCH_PARTIAL is set
* coded thus "(a){2,4}" and "(\d)+". If #G_REGEX_MATCH_PARTIAL or
* #G_REGEX_MATCH_PARTIAL_HARD is set
* for a pattern that does not conform to the restrictions, matching
* functions return an error.
*

View File

@ -361,6 +361,11 @@ typedef enum
* U+2029 PARAGRAPH SEPARATOR. Since: 2.34
* @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
* JavaScript rather than PCRE. Since: 2.34
* @G_REGEX_MATCH_PARTIAL_SOFT: An alias for #G_REGEX_MATCH_PARTIAL. Since: 2.34
* @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to
* to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
* is found, without continuing to search for a possible complete match. See
* see g_match_info_is_partial_match() for more information. Since: 2.34
*
* Flags specifying match-time options.
*
@ -381,7 +386,9 @@ typedef enum
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
G_REGEX_MATCH_BSR_ANY = 1 << 24
G_REGEX_MATCH_BSR_ANY = 1 << 24,
G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
G_REGEX_MATCH_PARTIAL_HARD = 1 << 27
} GRegexMatchFlags;
/**

View File

@ -474,7 +474,7 @@ test_partial (gconstpointer d)
g_assert (regex != NULL);
g_regex_match (regex, data->string, G_REGEX_MATCH_PARTIAL, &match_info);
g_regex_match (regex, data->string, data->match_opts, &match_info);
g_assert_cmpint (data->expected, ==, g_match_info_is_partial_match (match_info));
@ -488,18 +488,21 @@ test_partial (gconstpointer d)
g_regex_unref (regex);
}
#define TEST_PARTIAL(_pattern, _string, _expected) { \
#define TEST_PARTIAL_FULL(_pattern, _string, _match_opts, _expected) { \
TestMatchData *data; \
gchar *path; \
data = g_new0 (TestMatchData, 1); \
data->pattern = _pattern; \
data->string = _string; \
data->match_opts = _match_opts; \
data->expected = _expected; \
path = g_strdup_printf ("/regex/match/partial/%d", ++total); \
g_test_add_data_func (path, data, test_partial); \
g_free (path); \
}
#define TEST_PARTIAL(_pattern, _string, _expected) TEST_PARTIAL_FULL(_pattern, _string, G_REGEX_MATCH_PARTIAL, _expected)
typedef struct {
const gchar *pattern;
const gchar *string;
@ -2364,6 +2367,10 @@ main (int argc, char *argv[])
TEST_PARTIAL("(a)+b", "aa", TRUE);
TEST_PARTIAL("a?b", "a", TRUE);
/* Test soft vs. hard partial matching */
TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_SOFT, FALSE);
TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_HARD, TRUE);
/* TEST_SUB_PATTERN(pattern, string, start_position, sub_n, expected_sub,
* expected_start, expected_end) */
TEST_SUB_PATTERN("a", "a", 0, 0, "a", 0, 1);