mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-02-09 12:25:48 +01:00
regex: Add PARTIAL_HARD match option
Since PCRE 8.00 it supports a new partial matching method PCRE_PARTIAL_HARD.
This commit is contained in:
parent
0a2967030a
commit
1171215014
@ -146,7 +146,9 @@
|
|||||||
G_REGEX_MATCH_NEWLINE_ANY | \
|
G_REGEX_MATCH_NEWLINE_ANY | \
|
||||||
G_REGEX_MATCH_NEWLINE_ANYCRLF | \
|
G_REGEX_MATCH_NEWLINE_ANYCRLF | \
|
||||||
G_REGEX_MATCH_BSR_ANYCRLF | \
|
G_REGEX_MATCH_BSR_ANYCRLF | \
|
||||||
G_REGEX_MATCH_BSR_ANY)
|
G_REGEX_MATCH_BSR_ANY | \
|
||||||
|
G_REGEX_MATCH_PARTIAL_SOFT | \
|
||||||
|
G_REGEX_MATCH_PARTIAL_HARD)
|
||||||
|
|
||||||
/* we rely on these flags having the same values */
|
/* we rely on these flags having the same values */
|
||||||
G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
|
G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
|
||||||
@ -177,6 +179,8 @@ G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
|
|||||||
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
||||||
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
||||||
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
|
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
|
||||||
|
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
|
||||||
|
G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
|
||||||
|
|
||||||
/* These PCRE flags are unused or not exposed publically in GRegexFlags, so
|
/* These PCRE flags are unused or not exposed publically in GRegexFlags, so
|
||||||
* it should be ok to reuse them for different things.
|
* it should be ok to reuse them for different things.
|
||||||
@ -849,13 +853,21 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
|
|||||||
* able to raise an error as soon as a mistake is made.
|
* able to raise an error as soon as a mistake is made.
|
||||||
*
|
*
|
||||||
* GRegex supports the concept of partial matching by means of the
|
* GRegex supports the concept of partial matching by means of the
|
||||||
* #G_REGEX_MATCH_PARTIAL flag. When this is set the return code for
|
* #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD flags.
|
||||||
|
* When they are used, the return code for
|
||||||
* g_regex_match() or g_regex_match_full() is, as usual, %TRUE
|
* g_regex_match() or g_regex_match_full() is, as usual, %TRUE
|
||||||
* for a complete match, %FALSE otherwise. But, when these functions
|
* for a complete match, %FALSE otherwise. But, when these functions
|
||||||
* return %FALSE, you can check if the match was partial calling
|
* return %FALSE, you can check if the match was partial calling
|
||||||
* g_match_info_is_partial_match().
|
* g_match_info_is_partial_match().
|
||||||
*
|
*
|
||||||
* When using partial matching you cannot use g_match_info_fetch*().
|
* The difference between #G_REGEX_MATCH_PARTIAL_SOFT and
|
||||||
|
* #G_REGEX_MATCH_PARTIAL_HARD is that when a partial match is encountered
|
||||||
|
* with #G_REGEX_MATCH_PARTIAL_SOFT, matching continues to search for a
|
||||||
|
* possible complete match, while with #G_REGEX_MATCH_PARTIAL_HARD matching
|
||||||
|
* stops at the partial match.
|
||||||
|
* When both #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD
|
||||||
|
* are set, the latter takes precedence.
|
||||||
|
* See <ulink>man:pcrepartial</ulink> for more information on partial matching.
|
||||||
*
|
*
|
||||||
* Because of the way certain internal optimizations are implemented
|
* Because of the way certain internal optimizations are implemented
|
||||||
* the partial matching algorithm cannot be used with all patterns.
|
* the partial matching algorithm cannot be used with all patterns.
|
||||||
@ -864,7 +876,8 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
|
|||||||
* of occurrences is greater than one. Optional items such as "\d?"
|
* of occurrences is greater than one. Optional items such as "\d?"
|
||||||
* (where the maximum is one) are permitted. Quantifiers with any values
|
* (where the maximum is one) are permitted. Quantifiers with any values
|
||||||
* are permitted after parentheses, so the invalid examples above can be
|
* are permitted after parentheses, so the invalid examples above can be
|
||||||
* coded thus "(a){2,4}" and "(\d)+". If #G_REGEX_MATCH_PARTIAL is set
|
* coded thus "(a){2,4}" and "(\d)+". If #G_REGEX_MATCH_PARTIAL or
|
||||||
|
* #G_REGEX_MATCH_PARTIAL_HARD is set
|
||||||
* for a pattern that does not conform to the restrictions, matching
|
* for a pattern that does not conform to the restrictions, matching
|
||||||
* functions return an error.
|
* functions return an error.
|
||||||
*
|
*
|
||||||
|
@ -361,6 +361,11 @@ typedef enum
|
|||||||
* U+2029 PARAGRAPH SEPARATOR. Since: 2.34
|
* U+2029 PARAGRAPH SEPARATOR. Since: 2.34
|
||||||
* @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
|
* @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
|
||||||
* JavaScript rather than PCRE. Since: 2.34
|
* JavaScript rather than PCRE. Since: 2.34
|
||||||
|
* @G_REGEX_MATCH_PARTIAL_SOFT: An alias for #G_REGEX_MATCH_PARTIAL. Since: 2.34
|
||||||
|
* @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to
|
||||||
|
* to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
|
||||||
|
* is found, without continuing to search for a possible complete match. See
|
||||||
|
* see g_match_info_is_partial_match() for more information. Since: 2.34
|
||||||
*
|
*
|
||||||
* Flags specifying match-time options.
|
* Flags specifying match-time options.
|
||||||
*
|
*
|
||||||
@ -381,7 +386,9 @@ typedef enum
|
|||||||
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
|
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
|
||||||
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
|
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
|
||||||
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
|
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
|
||||||
G_REGEX_MATCH_BSR_ANY = 1 << 24
|
G_REGEX_MATCH_BSR_ANY = 1 << 24,
|
||||||
|
G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
|
||||||
|
G_REGEX_MATCH_PARTIAL_HARD = 1 << 27
|
||||||
} GRegexMatchFlags;
|
} GRegexMatchFlags;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -474,7 +474,7 @@ test_partial (gconstpointer d)
|
|||||||
|
|
||||||
g_assert (regex != NULL);
|
g_assert (regex != NULL);
|
||||||
|
|
||||||
g_regex_match (regex, data->string, G_REGEX_MATCH_PARTIAL, &match_info);
|
g_regex_match (regex, data->string, data->match_opts, &match_info);
|
||||||
|
|
||||||
g_assert_cmpint (data->expected, ==, g_match_info_is_partial_match (match_info));
|
g_assert_cmpint (data->expected, ==, g_match_info_is_partial_match (match_info));
|
||||||
|
|
||||||
@ -488,18 +488,21 @@ test_partial (gconstpointer d)
|
|||||||
g_regex_unref (regex);
|
g_regex_unref (regex);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define TEST_PARTIAL(_pattern, _string, _expected) { \
|
#define TEST_PARTIAL_FULL(_pattern, _string, _match_opts, _expected) { \
|
||||||
TestMatchData *data; \
|
TestMatchData *data; \
|
||||||
gchar *path; \
|
gchar *path; \
|
||||||
data = g_new0 (TestMatchData, 1); \
|
data = g_new0 (TestMatchData, 1); \
|
||||||
data->pattern = _pattern; \
|
data->pattern = _pattern; \
|
||||||
data->string = _string; \
|
data->string = _string; \
|
||||||
|
data->match_opts = _match_opts; \
|
||||||
data->expected = _expected; \
|
data->expected = _expected; \
|
||||||
path = g_strdup_printf ("/regex/match/partial/%d", ++total); \
|
path = g_strdup_printf ("/regex/match/partial/%d", ++total); \
|
||||||
g_test_add_data_func (path, data, test_partial); \
|
g_test_add_data_func (path, data, test_partial); \
|
||||||
g_free (path); \
|
g_free (path); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define TEST_PARTIAL(_pattern, _string, _expected) TEST_PARTIAL_FULL(_pattern, _string, G_REGEX_MATCH_PARTIAL, _expected)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const gchar *pattern;
|
const gchar *pattern;
|
||||||
const gchar *string;
|
const gchar *string;
|
||||||
@ -2364,6 +2367,10 @@ main (int argc, char *argv[])
|
|||||||
TEST_PARTIAL("(a)+b", "aa", TRUE);
|
TEST_PARTIAL("(a)+b", "aa", TRUE);
|
||||||
TEST_PARTIAL("a?b", "a", TRUE);
|
TEST_PARTIAL("a?b", "a", TRUE);
|
||||||
|
|
||||||
|
/* Test soft vs. hard partial matching */
|
||||||
|
TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_SOFT, FALSE);
|
||||||
|
TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_HARD, TRUE);
|
||||||
|
|
||||||
/* TEST_SUB_PATTERN(pattern, string, start_position, sub_n, expected_sub,
|
/* TEST_SUB_PATTERN(pattern, string, start_position, sub_n, expected_sub,
|
||||||
* expected_start, expected_end) */
|
* expected_start, expected_end) */
|
||||||
TEST_SUB_PATTERN("a", "a", 0, 0, "a", 0, 1);
|
TEST_SUB_PATTERN("a", "a", 0, 0, "a", 0, 1);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user