From e5550a24db48ce6f67b0f3b8df9dc467087c941b Mon Sep 17 00:00:00 2001 From: Christian Persch Date: Thu, 7 Jun 2012 18:11:49 +0200 Subject: [PATCH] regex: Don't leak internal PCRE options g_regex_get_compile_get_compile_flags() and g_regex_get_match_flags() were leaking PCRE flags that don't exist in the corresponding public GRegexCompileFlags and GRegexMatchFlags; this change masks these internal flags. --- glib/gregex.c | 36 ++++++++++++++++++----- glib/tests/regex.c | 73 +++++++++++++++++++++++++++++++--------------- 2 files changed, 78 insertions(+), 31 deletions(-) diff --git a/glib/gregex.c b/glib/gregex.c index a0aee8028..d6ccc8898 100644 --- a/glib/gregex.c +++ b/glib/gregex.c @@ -116,6 +116,11 @@ G_REGEX_NEWLINE_ANYCRLF | \ G_REGEX_BSR_ANYCRLF) +/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */ +#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK) +#define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW | \ + G_REGEX_OPTIMIZE) + /* Mask of all the possible values for GRegexMatchFlags. */ #define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \ G_REGEX_MATCH_NOTBOL | \ @@ -159,14 +164,20 @@ G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF); G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF); G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE); +/* These PCRE flags are unused or not exposed publically in GRegexFlags, so + * it should be ok to reuse them for different things. + */ +G_STATIC_ASSERT (G_REGEX_OPTIMIZE == PCRE_NO_UTF8_CHECK); +G_STATIC_ASSERT (G_REGEX_RAW == PCRE_UTF8); + /* if the string is in UTF-8 use g_utf8_ functions, else use * use just +/- 1. */ -#define NEXT_CHAR(re, s) (((re)->compile_opts & PCRE_UTF8) ? \ - g_utf8_next_char (s) : \ - ((s) + 1)) -#define PREV_CHAR(re, s) (((re)->compile_opts & PCRE_UTF8) ? \ - g_utf8_prev_char (s) : \ - ((s) - 1)) +#define NEXT_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \ + ((s) + 1) : \ + g_utf8_next_char (s)) +#define PREV_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \ + ((s) - 1) : \ + g_utf8_prev_char (s)) struct _GMatchInfo { @@ -1269,6 +1280,7 @@ g_regex_new (const gchar *pattern, gboolean optimize = FALSE; static volatile gsize initialised = 0; unsigned long int pcre_compile_options; + GRegexCompileFlags nonpcre_compile_options; g_return_val_if_fail (pattern != NULL, NULL); g_return_val_if_fail (error == NULL || *error == NULL, NULL); @@ -1297,6 +1309,8 @@ g_regex_new (const gchar *pattern, return NULL; } + nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK; + /* G_REGEX_OPTIMIZE has the same numeric value of PCRE_NO_UTF8_CHECK, * as we do not need to wrap PCRE_NO_UTF8_CHECK. */ if (compile_options & G_REGEX_OPTIMIZE) @@ -1362,7 +1376,13 @@ g_regex_new (const gchar *pattern, * compile options, e.g. "(?i)foo" will make the pcre structure store * PCRE_CASELESS even though it wasn't explicitly given for compilation. */ pcre_fullinfo (re, NULL, PCRE_INFO_OPTIONS, &pcre_compile_options); - compile_options = pcre_compile_options; + compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK; + + /* Don't leak PCRE_NEWLINE_ANY, which is part of PCRE_NEWLINE_ANYCRLF */ + if ((pcre_compile_options & PCRE_NEWLINE_ANYCRLF) != PCRE_NEWLINE_ANYCRLF) + compile_options &= ~PCRE_NEWLINE_ANY; + + compile_options |= nonpcre_compile_options; if (!(compile_options & G_REGEX_DUPNAMES)) { @@ -1517,7 +1537,7 @@ g_regex_get_match_flags (const GRegex *regex) { g_return_val_if_fail (regex != NULL, 0); - return regex->match_opts; + return regex->match_opts & G_REGEX_MATCH_MASK; } /** diff --git a/glib/tests/regex.c b/glib/tests/regex.c index b0362cd79..6e97fda3d 100644 --- a/glib/tests/regex.c +++ b/glib/tests/regex.c @@ -56,6 +56,9 @@ typedef struct { GRegexCompileFlags compile_opts; GRegexMatchFlags match_opts; gint expected_error; + gboolean check_flags; + GRegexCompileFlags real_compile_opts; + GRegexMatchFlags real_match_opts; } TestNewData; static void @@ -70,6 +73,12 @@ test_new (gconstpointer d) g_assert_no_error (error); g_assert_cmpstr (data->pattern, ==, g_regex_get_pattern (regex)); + if (data->check_flags) + { + g_assert_cmphex (g_regex_get_compile_flags (regex), ==, data->real_compile_opts); + g_assert_cmphex (g_regex_get_match_flags (regex), ==, data->real_match_opts); + } + g_regex_unref (regex); } @@ -81,11 +90,28 @@ test_new (gconstpointer d) data->compile_opts = _compile_opts; \ data->match_opts = _match_opts; \ data->expected_error = 0; \ + data->check_flags = FALSE; \ path = g_strdup_printf ("/regex/new/%d", ++total); \ g_test_add_data_func (path, data, test_new); \ g_free (path); \ } +#define TEST_NEW_CHECK_FLAGS(_pattern, _compile_opts, _match_opts, _real_compile_opts, _real_match_opts) { \ + TestNewData *data; \ + gchar *path; \ + data = g_new0 (TestNewData, 1); \ + data->pattern = _pattern; \ + data->compile_opts = _compile_opts; \ + data->match_opts = 0; \ + data->expected_error = 0; \ + data->check_flags = TRUE; \ + data->real_compile_opts = _real_compile_opts; \ + data->real_match_opts = _real_match_opts; \ + path = g_strdup_printf ("/regex/new-check-flags/%d", ++total); \ + g_test_add_data_func (path, data, test_new); \ + g_free (path); \ +} + static void test_new_fail (gconstpointer d) { @@ -1362,28 +1388,6 @@ test_match_all (gconstpointer d) } \ } -#define PCRE_UTF8 0x00000800 -#define PCRE_NO_UTF8_CHECK 0x00002000 -#define PCRE_NEWLINE_ANY 0x00400000 -#define PCRE_UCP 0x20000000 -#define PCRE_BSR_UNICODE 0x01000000 - -static void -test_basic (void) -{ - GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE; - GRegexMatchFlags mflags = G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL; - GRegex *regex; - - regex = g_regex_new ("[A-Z]+", cflags, mflags, NULL); - - g_assert (regex != NULL); - g_assert_cmpint (g_regex_get_compile_flags (regex), ==, cflags|PCRE_UTF8|PCRE_NO_UTF8_CHECK|PCRE_NEWLINE_ANY|PCRE_UCP|PCRE_BSR_UNICODE); - g_assert_cmpint (g_regex_get_match_flags (regex), ==, mflags|PCRE_NO_UTF8_CHECK); - - g_regex_unref (regex); -} - static void test_properties (void) { @@ -2043,7 +2047,6 @@ main (int argc, char *argv[]) g_test_bug_base ("http://bugzilla.gnome.org/"); - g_test_add_func ("/regex/basic", test_basic); g_test_add_func ("/regex/properties", test_properties); g_test_add_func ("/regex/class", test_class); g_test_add_func ("/regex/lookahead", test_lookahead); @@ -2055,6 +2058,7 @@ main (int argc, char *argv[]) g_test_add_func ("/regex/explicit-crlf", test_explicit_crlf); /* TEST_NEW(pattern, compile_opts, match_opts) */ + TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL); TEST_NEW("", 0, 0); TEST_NEW(".*", 0, 0); TEST_NEW(".*", G_REGEX_OPTIMIZE, 0); @@ -2069,6 +2073,29 @@ main (int argc, char *argv[]) /* This gives "internal error: code overflow" with pcre 6.0 */ TEST_NEW("(?i)(?-i)", 0, 0); + /* Check that flags are correct if the pattern modifies them */ + /* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */ + TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0); + TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0); + TEST_NEW_CHECK_FLAGS ("(?i)a", 0, 0, G_REGEX_CASELESS, 0); + TEST_NEW_CHECK_FLAGS ("(?m)a", 0, 0, G_REGEX_MULTILINE, 0); + TEST_NEW_CHECK_FLAGS ("(?s)a", 0, 0, G_REGEX_DOTALL, 0); + TEST_NEW_CHECK_FLAGS ("(?x)a", 0, 0, G_REGEX_EXTENDED, 0); + TEST_NEW_CHECK_FLAGS ("(?J)a", 0, 0, G_REGEX_DUPNAMES, 0); + TEST_NEW_CHECK_FLAGS ("(?U)[a-z]+", 0, 0, G_REGEX_UNGREEDY, 0); + TEST_NEW_CHECK_FLAGS ("(?X)a", 0, 0, 0 /* not exposed by GRegex */, 0); + TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0); + TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0); + TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0); + TEST_NEW_CHECK_FLAGS ("(*CR)a", 0, 0, G_REGEX_NEWLINE_CR, 0); + TEST_NEW_CHECK_FLAGS ("(*LF)a", 0, 0, G_REGEX_NEWLINE_LF, 0); + TEST_NEW_CHECK_FLAGS ("(*CRLF)a", 0, 0, G_REGEX_NEWLINE_CRLF, 0); + TEST_NEW_CHECK_FLAGS ("(*ANY)a", 0, 0, 0 /* this is the default in GRegex */, 0); + TEST_NEW_CHECK_FLAGS ("(*ANYCRLF)a", 0, 0, G_REGEX_NEWLINE_ANYCRLF, 0); + TEST_NEW_CHECK_FLAGS ("(*BSR_ANYCRLF)a", 0, 0, G_REGEX_BSR_ANYCRLF, 0); + TEST_NEW_CHECK_FLAGS ("(*BSR_UNICODE)a", 0, 0, 0 /* this is the default in GRegex */, 0); + TEST_NEW_CHECK_FLAGS ("(*NO_START_OPT)a", 0, 0, 0 /* not exposed in GRegex */, 0); + /* TEST_NEW_FAIL(pattern, compile_opts, expected_error) */ TEST_NEW_FAIL("(", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); TEST_NEW_FAIL(")", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);