regex: Add BSR_ANYCRLF and BSR_ANY match options

These flags override the compile option at match time. They use PCRE_BSR_ANYCRLF
and PCRE_BSR_UNICODE, resp., which make \R match only CR, LF and CRLF, or any
Unicode newline character or character sequences, resp.
This commit is contained in:
Christian Persch 2012-06-07 16:44:52 +02:00
parent 88ac3839f5
commit 26d91ddc08
2 changed files with 17 additions and 2 deletions

View File

@ -126,7 +126,9 @@
G_REGEX_MATCH_NEWLINE_LF | \
G_REGEX_MATCH_NEWLINE_CRLF | \
G_REGEX_MATCH_NEWLINE_ANY | \
G_REGEX_MATCH_NEWLINE_ANYCRLF)
G_REGEX_MATCH_NEWLINE_ANYCRLF | \
G_REGEX_MATCH_BSR_ANYCRLF | \
G_REGEX_MATCH_BSR_ANY)
/* we rely on these flags having the same values */
G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
@ -154,6 +156,8 @@ G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
/* if the string is in UTF-8 use g_utf8_ functions, else use
* use just +/- 1. */

View File

@ -346,6 +346,15 @@ typedef enum
* @G_REGEX_MATCH_NEWLINE_ANYCRLF: Overrides the newline definition set when
* creating a new #GRegex; any '\r', '\n', or '\r\n' character sequence
* is recognized as a newline. Since: 2.34
* @G_REGEX_MATCH_BSR_ANYCRLF: Overrides the newline definition for "\R" set when
* creating a new #GRegex; only '\r', '\n', or '\r\n' character sequences
* are recognized as a newline by "\R". Since: 2.34
* @G_REGEX_MATCH_BSR_ANY: Overrides the newline definition for "\R" set when
* creating a new #GRegex; any Unicode newline character or character sequence
* are recognized as a newline by "\R". These are '\r', '\n' and '\rn', and the
* single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
* U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
* U+2029 PARAGRAPH SEPARATOR. Since: 2.34
*
* Flags specifying match-time options.
*
@ -364,7 +373,9 @@ typedef enum
G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
G_REGEX_MATCH_BSR_ANY = 1 << 24
} GRegexMatchFlags;
/**