diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8fe839a75..aeeabc36e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,10 +10,10 @@ cache: - _ccache/ variables: - FEDORA_IMAGE: "registry.gitlab.gnome.org/gnome/glib/fedora:v17" - COVERITY_IMAGE: "registry.gitlab.gnome.org/gnome/glib/coverity:v6" - DEBIAN_IMAGE: "registry.gitlab.gnome.org/gnome/glib/debian-stable:v12" - MINGW_IMAGE: "registry.gitlab.gnome.org/gnome/glib/mingw:v8" + FEDORA_IMAGE: "registry.gitlab.gnome.org/gnome/glib/fedora:v18" + COVERITY_IMAGE: "registry.gitlab.gnome.org/gnome/glib/coverity:v7" + DEBIAN_IMAGE: "registry.gitlab.gnome.org/gnome/glib/debian-stable:v13" + MINGW_IMAGE: "registry.gitlab.gnome.org/gnome/glib/mingw:v9" MESON_TEST_TIMEOUT_MULTIPLIER: 2 G_MESSAGES_DEBUG: all MESON_COMMON_OPTIONS: "--buildtype debug --wrap-mode=nodownload --fatal-meson-warnings" diff --git a/.gitlab-ci/coverity.Dockerfile b/.gitlab-ci/coverity.Dockerfile index 5ee757e2d..241861ec9 100644 --- a/.gitlab-ci/coverity.Dockerfile +++ b/.gitlab-ci/coverity.Dockerfile @@ -1,4 +1,4 @@ -FROM registry.gitlab.gnome.org/gnome/glib/fedora:v17 +FROM registry.gitlab.gnome.org/gnome/glib/fedora:v18 ARG COVERITY_SCAN_PROJECT_NAME ARG COVERITY_SCAN_TOKEN diff --git a/.gitlab-ci/debian-stable.Dockerfile b/.gitlab-ci/debian-stable.Dockerfile index 0622aa26b..5dcf2d3c2 100644 --- a/.gitlab-ci/debian-stable.Dockerfile +++ b/.gitlab-ci/debian-stable.Dockerfile @@ -26,7 +26,6 @@ RUN apt-get update -qq && apt-get install --no-install-recommends -qq -y \ libelf-dev \ libffi-dev \ libmount-dev \ - libpcre3-dev \ libpcre2-dev \ libselinux1-dev \ libxml2-utils \ diff --git a/.gitlab-ci/fedora.Dockerfile b/.gitlab-ci/fedora.Dockerfile index b83a34c8c..2506f9386 100644 --- a/.gitlab-ci/fedora.Dockerfile +++ b/.gitlab-ci/fedora.Dockerfile @@ -43,7 +43,6 @@ RUN dnf -y update \ libxslt \ ncurses-compat-libs \ ninja-build \ - pcre-devel \ pcre2-devel \ "python3-dbusmock >= 0.18.3-2" \ python3-pip \ diff --git a/.gitlab-ci/mingw.Dockerfile b/.gitlab-ci/mingw.Dockerfile index 5fe5e67de..b109122fc 100644 --- a/.gitlab-ci/mingw.Dockerfile +++ b/.gitlab-ci/mingw.Dockerfile @@ -43,7 +43,6 @@ RUN dnf -y install \ mingw64-zlib \ ncurses-compat-libs \ ninja-build \ - pcre-devel \ pcre2-devel \ python3 \ python3-pip \ diff --git a/.gitlab-ci/test-msys2.sh b/.gitlab-ci/test-msys2.sh index d891ee008..a345fda6a 100755 --- a/.gitlab-ci/test-msys2.sh +++ b/.gitlab-ci/test-msys2.sh @@ -18,7 +18,6 @@ pacman --noconfirm -S --needed \ mingw-w64-$MSYS2_ARCH-gettext \ mingw-w64-$MSYS2_ARCH-libffi \ mingw-w64-$MSYS2_ARCH-meson \ - mingw-w64-$MSYS2_ARCH-pcre \ mingw-w64-$MSYS2_ARCH-pcre2 \ mingw-w64-$MSYS2_ARCH-python3 \ mingw-w64-$MSYS2_ARCH-python-pip \ diff --git a/docs/reference/glib/regex-syntax.xml b/docs/reference/glib/regex-syntax.xml index 5dd92917a..0b413aa02 100644 --- a/docs/reference/glib/regex-syntax.xml +++ b/docs/reference/glib/regex-syntax.xml @@ -2442,52 +2442,6 @@ processing option does not affect the called subpattern. - - Copyright diff --git a/glib/gregex.c b/glib/gregex.c index 5254d8d28..55672249c 100644 --- a/glib/gregex.c +++ b/glib/gregex.c @@ -24,7 +24,8 @@ #include -#include +#define PCRE2_CODE_UNIT_WIDTH 8 +#include #include "gtypes.h" #include "gregex.h" @@ -109,87 +110,63 @@ * library written by Philip Hazel. */ +/* Signifies that flags have already been converted from pcre1 to pcre2. The + * value 0x04000000u is also the value of PCRE2_MATCH_INVALID_UTF in pcre2.h, + * but it is not used in gregex, so we can reuse it for this flag. + */ +#define G_REGEX_FLAGS_CONVERTED 0x04000000u /* Mask of all the possible values for GRegexCompileFlags. */ -#define G_REGEX_COMPILE_MASK (G_REGEX_CASELESS | \ - G_REGEX_MULTILINE | \ - G_REGEX_DOTALL | \ - G_REGEX_EXTENDED | \ - G_REGEX_ANCHORED | \ - G_REGEX_DOLLAR_ENDONLY | \ - G_REGEX_UNGREEDY | \ - G_REGEX_RAW | \ - G_REGEX_NO_AUTO_CAPTURE | \ - G_REGEX_OPTIMIZE | \ - G_REGEX_FIRSTLINE | \ - G_REGEX_DUPNAMES | \ - G_REGEX_NEWLINE_CR | \ - G_REGEX_NEWLINE_LF | \ - G_REGEX_NEWLINE_CRLF | \ - G_REGEX_NEWLINE_ANYCRLF | \ - G_REGEX_BSR_ANYCRLF | \ - G_REGEX_JAVASCRIPT_COMPAT) +#define G_REGEX_COMPILE_MASK (PCRE2_CASELESS | \ + PCRE2_MULTILINE | \ + PCRE2_DOTALL | \ + PCRE2_EXTENDED | \ + PCRE2_ANCHORED | \ + PCRE2_DOLLAR_ENDONLY | \ + PCRE2_UNGREEDY | \ + PCRE2_UTF | \ + PCRE2_NO_AUTO_CAPTURE | \ + PCRE2_FIRSTLINE | \ + PCRE2_DUPNAMES | \ + PCRE2_NEWLINE_CR | \ + PCRE2_NEWLINE_LF | \ + PCRE2_NEWLINE_CRLF | \ + PCRE2_NEWLINE_ANYCRLF | \ + PCRE2_BSR_ANYCRLF | \ + G_REGEX_FLAGS_CONVERTED) /* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */ #define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK) -#define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW | \ - G_REGEX_OPTIMIZE) +#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF | \ + G_REGEX_FLAGS_CONVERTED) /* Mask of all the possible values for GRegexMatchFlags. */ -#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \ - G_REGEX_MATCH_NOTBOL | \ - G_REGEX_MATCH_NOTEOL | \ - G_REGEX_MATCH_NOTEMPTY | \ - G_REGEX_MATCH_PARTIAL | \ - G_REGEX_MATCH_NEWLINE_CR | \ - G_REGEX_MATCH_NEWLINE_LF | \ - G_REGEX_MATCH_NEWLINE_CRLF | \ - G_REGEX_MATCH_NEWLINE_ANY | \ - G_REGEX_MATCH_NEWLINE_ANYCRLF | \ - G_REGEX_MATCH_BSR_ANYCRLF | \ - G_REGEX_MATCH_BSR_ANY | \ - G_REGEX_MATCH_PARTIAL_SOFT | \ - G_REGEX_MATCH_PARTIAL_HARD | \ - G_REGEX_MATCH_NOTEMPTY_ATSTART) +#define G_REGEX_MATCH_MASK (PCRE2_ANCHORED | \ + PCRE2_NOTBOL | \ + PCRE2_NOTEOL | \ + PCRE2_NOTEMPTY | \ + PCRE2_PARTIAL_SOFT | \ + PCRE2_NEWLINE_CR | \ + PCRE2_NEWLINE_LF | \ + PCRE2_NEWLINE_CRLF | \ + PCRE2_NEWLINE_ANY | \ + PCRE2_NEWLINE_ANYCRLF | \ + PCRE2_BSR_ANYCRLF | \ + PCRE2_BSR_UNICODE | \ + PCRE2_PARTIAL_SOFT | \ + PCRE2_PARTIAL_HARD | \ + PCRE2_NOTEMPTY_ATSTART | \ + G_REGEX_FLAGS_CONVERTED) -/* we rely on these flags having the same values */ -G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS); -G_STATIC_ASSERT (G_REGEX_MULTILINE == PCRE_MULTILINE); -G_STATIC_ASSERT (G_REGEX_DOTALL == PCRE_DOTALL); -G_STATIC_ASSERT (G_REGEX_EXTENDED == PCRE_EXTENDED); -G_STATIC_ASSERT (G_REGEX_ANCHORED == PCRE_ANCHORED); -G_STATIC_ASSERT (G_REGEX_DOLLAR_ENDONLY == PCRE_DOLLAR_ENDONLY); -G_STATIC_ASSERT (G_REGEX_UNGREEDY == PCRE_UNGREEDY); -G_STATIC_ASSERT (G_REGEX_NO_AUTO_CAPTURE == PCRE_NO_AUTO_CAPTURE); -G_STATIC_ASSERT (G_REGEX_FIRSTLINE == PCRE_FIRSTLINE); -G_STATIC_ASSERT (G_REGEX_DUPNAMES == PCRE_DUPNAMES); -G_STATIC_ASSERT (G_REGEX_NEWLINE_CR == PCRE_NEWLINE_CR); -G_STATIC_ASSERT (G_REGEX_NEWLINE_LF == PCRE_NEWLINE_LF); -G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF); -G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF); -G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF); -G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT); +#define G_REGEX_NEWLINE_MASK (PCRE2_NEWLINE_CR | \ + PCRE2_NEWLINE_LF | \ + PCRE2_NEWLINE_CRLF | \ + PCRE2_NEWLINE_ANYCRLF) -G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED); -G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL); -G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL); -G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY); -G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL); -G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR); -G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF); -G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF); -G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY); -G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF); -G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF); -G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE); -G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT); -G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD); -G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART); - -/* These PCRE flags are unused or not exposed publicly in GRegexFlags, so - * it should be ok to reuse them for different things. - */ -G_STATIC_ASSERT (G_REGEX_OPTIMIZE == PCRE_NO_UTF8_CHECK); -G_STATIC_ASSERT (G_REGEX_RAW == PCRE_UTF8); +#define G_REGEX_MATCH_NEWLINE_MASK (PCRE2_NEWLINE_CR | \ + PCRE2_NEWLINE_LF | \ + PCRE2_NEWLINE_CRLF | \ + PCRE2_NEWLINE_ANYCRLF | \ + PCRE2_NEWLINE_ANY) /* if the string is in UTF-8 use g_utf8_ functions, else use * use just +/- 1. */ @@ -210,24 +187,26 @@ struct _GMatchInfo gint pos; /* position in the string where last match left off */ gint n_offsets; /* number of offsets */ gint *offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */ - gint *workspace; /* workspace for pcre_dfa_exec() */ + gint *workspace; /* workspace for pcre2_dfa_match() */ gint n_workspace; /* number of workspace elements */ const gchar *string; /* string passed to the match function */ gssize string_len; /* length of string, in bytes */ + pcre2_match_context *match_context; + pcre2_match_data *match_data; }; struct _GRegex { gint ref_count; /* the ref count for the immutable part (atomic) */ gchar *pattern; /* the pattern */ - pcre *pcre_re; /* compiled form of the pattern */ - GRegexCompileFlags compile_opts; /* options used at compile time on the pattern */ + pcre2_code *pcre_re; /* compiled form of the pattern */ + GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */ + GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */ GRegexMatchFlags match_opts; /* options used at match time on the regex */ - pcre_extra *extra; /* data stored when G_REGEX_OPTIMIZE is used */ }; /* TRUE if ret is an error code, FALSE otherwise. */ -#define IS_PCRE_ERROR(ret) ((ret) < PCRE_ERROR_NOMATCH && (ret) != PCRE_ERROR_PARTIAL) +#define IS_PCRE2_ERROR(ret) ((ret) < PCRE2_ERROR_NOMATCH && (ret) != PCRE2_ERROR_PARTIAL) typedef struct _InterpolationData InterpolationData; static gboolean interpolation_list_needs_match (GList *list); @@ -238,70 +217,249 @@ static GList *split_replacement (const gchar *replacement, GError **error); static void free_interpolation_data (InterpolationData *data); +static gint +map_to_pcre2_compile_flags (gint pcre1_flags) +{ + /* Maps compile flags from pcre1 to pcre2 values + */ + gint pcre2_flags = G_REGEX_FLAGS_CONVERTED; + + if (pcre1_flags & G_REGEX_FLAGS_CONVERTED) + return pcre1_flags; + + if (pcre1_flags & G_REGEX_CASELESS) + pcre2_flags |= PCRE2_CASELESS; + if (pcre1_flags & G_REGEX_MULTILINE) + pcre2_flags |= PCRE2_MULTILINE; + if (pcre1_flags & G_REGEX_DOTALL) + pcre2_flags |= PCRE2_DOTALL; + if (pcre1_flags & G_REGEX_EXTENDED) + pcre2_flags |= PCRE2_EXTENDED; + if (pcre1_flags & G_REGEX_ANCHORED) + pcre2_flags |= PCRE2_ANCHORED; + if (pcre1_flags & G_REGEX_DOLLAR_ENDONLY) + pcre2_flags |= PCRE2_DOLLAR_ENDONLY; + if (pcre1_flags & G_REGEX_UNGREEDY) + pcre2_flags |= PCRE2_UNGREEDY; + if (!(pcre1_flags & G_REGEX_RAW)) + pcre2_flags |= PCRE2_UTF; + if (pcre1_flags & G_REGEX_NO_AUTO_CAPTURE) + pcre2_flags |= PCRE2_NO_AUTO_CAPTURE; + if (pcre1_flags & G_REGEX_FIRSTLINE) + pcre2_flags |= PCRE2_FIRSTLINE; + if (pcre1_flags & G_REGEX_DUPNAMES) + pcre2_flags |= PCRE2_DUPNAMES; + if (pcre1_flags & G_REGEX_NEWLINE_CR) + pcre2_flags |= PCRE2_NEWLINE_CR; + if (pcre1_flags & G_REGEX_NEWLINE_LF) + pcre2_flags |= PCRE2_NEWLINE_LF; + /* Check for exact match for a composite flag */ + if ((pcre1_flags & G_REGEX_NEWLINE_CRLF) == G_REGEX_NEWLINE_CRLF) + pcre2_flags |= PCRE2_NEWLINE_CRLF; + /* Check for exact match for a composite flag */ + if ((pcre1_flags & G_REGEX_NEWLINE_ANYCRLF) == G_REGEX_NEWLINE_ANYCRLF) + pcre2_flags |= PCRE2_NEWLINE_ANYCRLF; + if (pcre1_flags & G_REGEX_BSR_ANYCRLF) + pcre2_flags |= PCRE2_BSR_ANYCRLF; + + /* these are not available in pcre2 */ +G_GNUC_BEGIN_IGNORE_DEPRECATIONS + if (pcre1_flags & G_REGEX_OPTIMIZE) + pcre2_flags |= 0; + if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT) + pcre2_flags |= 0; +G_GNUC_END_IGNORE_DEPRECATIONS + + return pcre2_flags; +} + +static gint +map_to_pcre2_match_flags (gint pcre1_flags) +{ + /* Maps match flags from pcre1 to pcre2 values + */ + gint pcre2_flags = G_REGEX_FLAGS_CONVERTED; + + if (pcre1_flags & G_REGEX_FLAGS_CONVERTED) + return pcre1_flags; + + if (pcre1_flags & G_REGEX_MATCH_ANCHORED) + pcre2_flags |= PCRE2_ANCHORED; + if (pcre1_flags & G_REGEX_MATCH_NOTBOL) + pcre2_flags |= PCRE2_NOTBOL; + if (pcre1_flags & G_REGEX_MATCH_NOTEOL) + pcre2_flags |= PCRE2_NOTEOL; + if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY) + pcre2_flags |= PCRE2_NOTEMPTY; + if (pcre1_flags & G_REGEX_MATCH_PARTIAL) + pcre2_flags |= PCRE2_PARTIAL_SOFT; + if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR) + pcre2_flags |= PCRE2_NEWLINE_CR; + if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF) + pcre2_flags |= PCRE2_NEWLINE_LF; + /* Check for exact match for a composite flag */ + if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_CRLF) == G_REGEX_MATCH_NEWLINE_CRLF) + pcre2_flags |= PCRE2_NEWLINE_CRLF; + if (pcre1_flags & G_REGEX_MATCH_NEWLINE_ANY) + pcre2_flags |= PCRE2_NEWLINE_ANY; + /* Check for exact match for a composite flag */ + if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_ANYCRLF) == G_REGEX_MATCH_NEWLINE_ANYCRLF) + pcre2_flags |= PCRE2_NEWLINE_ANYCRLF; + if (pcre1_flags & G_REGEX_MATCH_BSR_ANYCRLF) + pcre2_flags |= PCRE2_BSR_ANYCRLF; + if (pcre1_flags & G_REGEX_MATCH_BSR_ANY) + pcre2_flags |= PCRE2_BSR_UNICODE; + if (pcre1_flags & G_REGEX_MATCH_PARTIAL_SOFT) + pcre2_flags |= PCRE2_PARTIAL_SOFT; + if (pcre1_flags & G_REGEX_MATCH_PARTIAL_HARD) + pcre2_flags |= PCRE2_PARTIAL_HARD; + if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART) + pcre2_flags |= PCRE2_NOTEMPTY_ATSTART; + + return pcre2_flags; +} + +static gint +map_to_pcre1_compile_flags (gint pcre2_flags) +{ + /* Maps compile flags from pcre2 to pcre1 values + */ + gint pcre1_flags = 0; + + if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED)) + return pcre2_flags; + + if (pcre2_flags & PCRE2_CASELESS) + pcre1_flags |= G_REGEX_CASELESS; + if (pcre2_flags & PCRE2_MULTILINE) + pcre1_flags |= G_REGEX_MULTILINE; + if (pcre2_flags & PCRE2_DOTALL) + pcre1_flags |= G_REGEX_DOTALL; + if (pcre2_flags & PCRE2_EXTENDED) + pcre1_flags |= G_REGEX_EXTENDED; + if (pcre2_flags & PCRE2_ANCHORED) + pcre1_flags |= G_REGEX_ANCHORED; + if (pcre2_flags & PCRE2_DOLLAR_ENDONLY) + pcre1_flags |= G_REGEX_DOLLAR_ENDONLY; + if (pcre2_flags & PCRE2_UNGREEDY) + pcre1_flags |= G_REGEX_UNGREEDY; + if (!(pcre2_flags & PCRE2_UTF)) + pcre1_flags |= G_REGEX_RAW; + if (pcre2_flags & PCRE2_NO_AUTO_CAPTURE) + pcre1_flags |= G_REGEX_NO_AUTO_CAPTURE; + if (pcre2_flags & PCRE2_FIRSTLINE) + pcre1_flags |= G_REGEX_FIRSTLINE; + if (pcre2_flags & PCRE2_DUPNAMES) + pcre1_flags |= G_REGEX_DUPNAMES; + if (pcre2_flags & PCRE2_NEWLINE_CR) + pcre1_flags |= G_REGEX_NEWLINE_CR; + if (pcre2_flags & PCRE2_NEWLINE_LF) + pcre1_flags |= G_REGEX_NEWLINE_LF; + /* Check for exact match for a composite flag */ + if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF) + pcre1_flags |= G_REGEX_NEWLINE_CRLF; + /* Check for exact match for a composite flag */ + if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF) + pcre1_flags |= G_REGEX_NEWLINE_ANYCRLF; + if (pcre2_flags & PCRE2_BSR_ANYCRLF) + pcre1_flags |= G_REGEX_BSR_ANYCRLF; + + return pcre1_flags; +} + +static gint +map_to_pcre1_match_flags (gint pcre2_flags) +{ + /* Maps match flags from pcre2 to pcre1 values + */ + gint pcre1_flags = 0; + + if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED)) + return pcre2_flags; + + if (pcre2_flags & PCRE2_ANCHORED) + pcre1_flags |= G_REGEX_MATCH_ANCHORED; + if (pcre2_flags & PCRE2_NOTBOL) + pcre1_flags |= G_REGEX_MATCH_NOTBOL; + if (pcre2_flags & PCRE2_NOTEOL) + pcre1_flags |= G_REGEX_MATCH_NOTEOL; + if (pcre2_flags & PCRE2_NOTEMPTY) + pcre1_flags |= G_REGEX_MATCH_NOTEMPTY; + if (pcre2_flags & PCRE2_PARTIAL_SOFT) + pcre1_flags |= G_REGEX_MATCH_PARTIAL; + if (pcre2_flags & PCRE2_NEWLINE_CR) + pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR; + if (pcre2_flags & PCRE2_NEWLINE_LF) + pcre1_flags |= G_REGEX_MATCH_NEWLINE_LF; + /* Check for exact match for a composite flag */ + if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF) + pcre1_flags |= G_REGEX_MATCH_NEWLINE_CRLF; + if (pcre2_flags & PCRE2_NEWLINE_ANY) + pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANY; + /* Check for exact match for a composite flag */ + if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF) + pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANYCRLF; + if (pcre2_flags & PCRE2_BSR_ANYCRLF) + pcre1_flags |= G_REGEX_MATCH_BSR_ANYCRLF; + if (pcre2_flags & PCRE2_BSR_UNICODE) + pcre1_flags |= G_REGEX_MATCH_BSR_ANY; + if (pcre2_flags & PCRE2_PARTIAL_SOFT) + pcre1_flags |= G_REGEX_MATCH_PARTIAL_SOFT; + if (pcre2_flags & PCRE2_PARTIAL_HARD) + pcre1_flags |= G_REGEX_MATCH_PARTIAL_HARD; + if (pcre2_flags & PCRE2_NOTEMPTY_ATSTART) + pcre1_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART; + + return pcre1_flags; +} static const gchar * match_error (gint errcode) { switch (errcode) { - case PCRE_ERROR_NOMATCH: + case PCRE2_ERROR_NOMATCH: /* not an error */ break; - case PCRE_ERROR_NULL: + case PCRE2_ERROR_NULL: /* NULL argument, this should not happen in GRegex */ g_warning ("A NULL argument was passed to PCRE"); break; - case PCRE_ERROR_BADOPTION: + case PCRE2_ERROR_BADOPTION: return "bad options"; - case PCRE_ERROR_BADMAGIC: + case PCRE2_ERROR_BADMAGIC: return _("corrupted object"); - case PCRE_ERROR_UNKNOWN_OPCODE: - return N_("internal error or corrupted object"); - case PCRE_ERROR_NOMEMORY: + case PCRE2_ERROR_NOMEMORY: return _("out of memory"); - case PCRE_ERROR_NOSUBSTRING: - /* not used by pcre_exec() */ + case PCRE2_ERROR_NOSUBSTRING: + /* not used by pcre2_match() */ break; - case PCRE_ERROR_MATCHLIMIT: + case PCRE2_ERROR_MATCHLIMIT: return _("backtracking limit reached"); - case PCRE_ERROR_CALLOUT: + case PCRE2_ERROR_CALLOUT: /* callouts are not implemented */ break; - case PCRE_ERROR_BADUTF8: - case PCRE_ERROR_BADUTF8_OFFSET: + case PCRE2_ERROR_BADUTFOFFSET: /* we do not check if strings are valid */ break; - case PCRE_ERROR_PARTIAL: + case PCRE2_ERROR_PARTIAL: /* not an error */ break; - case PCRE_ERROR_BADPARTIAL: - return _("the pattern contains items not supported for partial matching"); - case PCRE_ERROR_INTERNAL: + case PCRE2_ERROR_INTERNAL: return _("internal error"); - case PCRE_ERROR_BADCOUNT: - /* negative ovecsize, this should not happen in GRegex */ - g_warning ("A negative ovecsize was passed to PCRE"); - break; - case PCRE_ERROR_DFA_UITEM: + case PCRE2_ERROR_DFA_UITEM: return _("the pattern contains items not supported for partial matching"); - case PCRE_ERROR_DFA_UCOND: + case PCRE2_ERROR_DFA_UCOND: return _("back references as conditions are not supported for partial matching"); - case PCRE_ERROR_DFA_UMLIMIT: - /* the match_field field is not used in GRegex */ - break; - case PCRE_ERROR_DFA_WSSIZE: + case PCRE2_ERROR_DFA_WSSIZE: /* handled expanding the workspace */ break; - case PCRE_ERROR_DFA_RECURSE: - case PCRE_ERROR_RECURSIONLIMIT: + case PCRE2_ERROR_DFA_RECURSE: + case PCRE2_ERROR_RECURSIONLIMIT: return _("recursion limit reached"); - case PCRE_ERROR_BADNEWLINE: - return _("invalid combination of newline flags"); - case PCRE_ERROR_BADOFFSET: + case PCRE2_ERROR_BADOFFSET: return _("bad offset"); - case PCRE_ERROR_SHORTUTF8: - return _("short utf8"); - case PCRE_ERROR_RECURSELOOP: + case PCRE2_ERROR_RECURSELOOP: return _("recursion loop"); default: break; @@ -312,242 +470,263 @@ match_error (gint errcode) static void translate_compile_error (gint *errcode, const gchar **errmsg) { - /* Compile errors are created adding 100 to the error code returned - * by PCRE. - * If errcode is known we put the translatable error message in - * erromsg. If errcode is unknown we put the generic - * G_REGEX_ERROR_COMPILE error code in errcode and keep the - * untranslated error message returned by PCRE. + /* If errcode is known we put the translatable error message in + * errmsg. If errcode is unknown we put the generic + * G_REGEX_ERROR_COMPILE error code in errcode. * Note that there can be more PCRE errors with the same GRegexError * and that some PCRE errors are useless for us. */ - *errcode += 100; switch (*errcode) { - case G_REGEX_ERROR_STRAY_BACKSLASH: + case PCRE2_ERROR_END_BACKSLASH: + *errcode = G_REGEX_ERROR_STRAY_BACKSLASH; *errmsg = _("\\ at end of pattern"); break; - case G_REGEX_ERROR_MISSING_CONTROL_CHAR: + case PCRE2_ERROR_END_BACKSLASH_C: + *errcode = G_REGEX_ERROR_MISSING_CONTROL_CHAR; *errmsg = _("\\c at end of pattern"); break; - case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: + case PCRE2_ERROR_UNKNOWN_ESCAPE: + case PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE: + *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE; *errmsg = _("unrecognized character following \\"); break; - case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: + case PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER: + *errcode = G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER; *errmsg = _("numbers out of order in {} quantifier"); break; - case G_REGEX_ERROR_QUANTIFIER_TOO_BIG: + case PCRE2_ERROR_QUANTIFIER_TOO_BIG: + *errcode = G_REGEX_ERROR_QUANTIFIER_TOO_BIG; *errmsg = _("number too big in {} quantifier"); break; - case G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: + case PCRE2_ERROR_MISSING_SQUARE_BRACKET: + *errcode = G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS; *errmsg = _("missing terminating ] for character class"); break; - case G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: + case PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS: + *errcode = G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS; *errmsg = _("invalid escape sequence in character class"); break; - case G_REGEX_ERROR_RANGE_OUT_OF_ORDER: + case PCRE2_ERROR_CLASS_RANGE_ORDER: + *errcode = G_REGEX_ERROR_RANGE_OUT_OF_ORDER; *errmsg = _("range out of order in character class"); break; - case G_REGEX_ERROR_NOTHING_TO_REPEAT: + case PCRE2_ERROR_QUANTIFIER_INVALID: + case PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT: + *errcode = G_REGEX_ERROR_NOTHING_TO_REPEAT; *errmsg = _("nothing to repeat"); break; - case 111: /* internal error: unexpected repeat */ - *errcode = G_REGEX_ERROR_INTERNAL; - *errmsg = _("unexpected repeat"); - break; - case G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: + case PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY: + *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; *errmsg = _("unrecognized character after (? or (?-"); break; - case G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: + case PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS: + *errcode = G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS; *errmsg = _("POSIX named classes are supported only within a class"); break; - case G_REGEX_ERROR_UNMATCHED_PARENTHESIS: - *errmsg = _("missing terminating )"); - break; - case G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: - *errmsg = _("reference to non-existent subpattern"); - break; - case G_REGEX_ERROR_UNTERMINATED_COMMENT: - *errmsg = _("missing ) after comment"); - break; - case G_REGEX_ERROR_EXPRESSION_TOO_LARGE: - *errmsg = _("regular expression is too large"); - break; - case G_REGEX_ERROR_MEMORY_ERROR: - *errmsg = _("failed to get memory"); - break; - case 122: /* unmatched parentheses */ - *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; - *errmsg = _(") without opening ("); - break; - case 123: /* internal error: code overflow */ - *errcode = G_REGEX_ERROR_INTERNAL; - *errmsg = _("code overflow"); - break; - case 124: /* "unrecognized character after (?<\0 */ - *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; - *errmsg = _("unrecognized character after (?<"); - break; - case G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: - *errmsg = _("lookbehind assertion is not fixed length"); - break; - case G_REGEX_ERROR_MALFORMED_CONDITION: - *errmsg = _("malformed number or name after (?("); - break; - case G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: - *errmsg = _("conditional group contains more than two branches"); - break; - case G_REGEX_ERROR_ASSERTION_EXPECTED: - *errmsg = _("assertion expected after (?("); - break; - case 129: - *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; - /* translators: '(?R' and '(?[+-]digits' are both meant as (groups of) - * sequences here, '(?-54' would be an example for the second group. - */ - *errmsg = _("(?R or (?[+-]digits must be followed by )"); - break; - case G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: - *errmsg = _("unknown POSIX class name"); - break; - case G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: + case PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING: + *errcode = G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED; *errmsg = _("POSIX collating elements are not supported"); break; - case G_REGEX_ERROR_HEX_CODE_TOO_LARGE: + case PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS: + case PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS: + case PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING: + *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; + *errmsg = _("missing terminating )"); + break; + case PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE: + *errcode = G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE; + *errmsg = _("reference to non-existent subpattern"); + break; + case PCRE2_ERROR_MISSING_COMMENT_CLOSING: + *errcode = G_REGEX_ERROR_UNTERMINATED_COMMENT; + *errmsg = _("missing ) after comment"); + break; + case PCRE2_ERROR_PATTERN_TOO_LARGE: + *errcode = G_REGEX_ERROR_EXPRESSION_TOO_LARGE; + *errmsg = _("regular expression is too large"); + break; + case PCRE2_ERROR_MISSING_CONDITION_CLOSING: + *errcode = G_REGEX_ERROR_MALFORMED_CONDITION; + *errmsg = _("malformed number or name after (?("); + break; + case PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH: + *errcode = G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND; + *errmsg = _("lookbehind assertion is not fixed length"); + break; + case PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES: + *errcode = G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES; + *errmsg = _("conditional group contains more than two branches"); + break; + case PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED: + *errcode = G_REGEX_ERROR_ASSERTION_EXPECTED; + *errmsg = _("assertion expected after (?("); + break; + case PCRE2_ERROR_BAD_RELATIVE_REFERENCE: + *errcode = G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE; + *errmsg = _("a numbered reference must not be zero"); + break; + case PCRE2_ERROR_UNKNOWN_POSIX_CLASS: + *errcode = G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME; + *errmsg = _("unknown POSIX class name"); + break; + case PCRE2_ERROR_CODE_POINT_TOO_BIG: + case PCRE2_ERROR_INVALID_HEXADECIMAL: + *errcode = G_REGEX_ERROR_HEX_CODE_TOO_LARGE; *errmsg = _("character value in \\x{...} sequence is too large"); break; - case G_REGEX_ERROR_INVALID_CONDITION: - *errmsg = _("invalid condition (?(0)"); - break; - case G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: + case PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C: + *errcode = G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND; *errmsg = _("\\C not allowed in lookbehind assertion"); break; - case 137: /* PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0 */ - /* A number of Perl escapes are not handled by PCRE. - * Therefore it explicitly raises ERR37. - */ - *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE; - *errmsg = _("escapes \\L, \\l, \\N{name}, \\U, and \\u are not supported"); - break; - case G_REGEX_ERROR_INFINITE_LOOP: - *errmsg = _("recursive call could loop indefinitely"); - break; - case 141: /* unrecognized character after (?P\0 */ - *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; - *errmsg = _("unrecognized character after (?P"); - break; - case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: + case PCRE2_ERROR_MISSING_NAME_TERMINATOR: + *errcode = G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR; *errmsg = _("missing terminator in subpattern name"); break; - case G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: + case PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME: + *errcode = G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME; *errmsg = _("two named subpatterns have the same name"); break; - case G_REGEX_ERROR_MALFORMED_PROPERTY: + case PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY: + *errcode = G_REGEX_ERROR_MALFORMED_PROPERTY; *errmsg = _("malformed \\P or \\p sequence"); break; - case G_REGEX_ERROR_UNKNOWN_PROPERTY: + case PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY: + *errcode = G_REGEX_ERROR_UNKNOWN_PROPERTY; *errmsg = _("unknown property name after \\P or \\p"); break; - case G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: + case PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG: + *errcode = G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG; *errmsg = _("subpattern name is too long (maximum 32 characters)"); break; - case G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: + case PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS: + *errcode = G_REGEX_ERROR_TOO_MANY_SUBPATTERNS; *errmsg = _("too many named subpatterns (maximum 10,000)"); break; - case G_REGEX_ERROR_INVALID_OCTAL_VALUE: + case PCRE2_ERROR_OCTAL_BYTE_TOO_BIG: + *errcode = G_REGEX_ERROR_INVALID_OCTAL_VALUE; *errmsg = _("octal value is greater than \\377"); break; - case 152: /* internal error: overran compiling workspace */ - *errcode = G_REGEX_ERROR_INTERNAL; - *errmsg = _("overran compiling workspace"); - break; - case 153: /* internal error: previously-checked referenced subpattern not found */ - *errcode = G_REGEX_ERROR_INTERNAL; - *errmsg = _("previously-checked referenced subpattern not found"); - break; - case G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: + case PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES: + *errcode = G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE; *errmsg = _("DEFINE group contains more than one branch"); break; - case G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: + case PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE: + *errcode = G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS; *errmsg = _("inconsistent NEWLINE options"); break; - case G_REGEX_ERROR_MISSING_BACK_REFERENCE: + case PCRE2_ERROR_BACKSLASH_G_SYNTAX: + *errcode = G_REGEX_ERROR_MISSING_BACK_REFERENCE; *errmsg = _("\\g is not followed by a braced, angle-bracketed, or quoted name or " "number, or by a plain number"); break; - case G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: - *errmsg = _("a numbered reference must not be zero"); - break; - case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: + case PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED: + *errcode = G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN; *errmsg = _("an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)"); break; - case G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: + case PCRE2_ERROR_VERB_UNKNOWN: + *errcode = G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB; *errmsg = _("(*VERB) not recognized"); break; - case G_REGEX_ERROR_NUMBER_TOO_BIG: + case PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG: + *errcode = G_REGEX_ERROR_NUMBER_TOO_BIG; *errmsg = _("number is too big"); break; - case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: + case PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED: + *errcode = G_REGEX_ERROR_MISSING_SUBPATTERN_NAME; *errmsg = _("missing subpattern name after (?&"); break; - case G_REGEX_ERROR_MISSING_DIGIT: - *errmsg = _("digit expected after (?+"); - break; - case G_REGEX_ERROR_INVALID_DATA_CHARACTER: - *errmsg = _("] is an invalid data character in JavaScript compatibility mode"); - break; - case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: + case PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH: + *errcode = G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME; *errmsg = _("different names for subpatterns of the same number are not allowed"); break; - case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: + case PCRE2_ERROR_MARK_MISSING_ARGUMENT: + *errcode = G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED; *errmsg = _("(*MARK) must have an argument"); break; - case G_REGEX_ERROR_INVALID_CONTROL_CHAR: + case PCRE2_ERROR_BACKSLASH_C_SYNTAX: + *errcode = G_REGEX_ERROR_INVALID_CONTROL_CHAR; *errmsg = _( "\\c must be followed by an ASCII character"); break; - case G_REGEX_ERROR_MISSING_NAME: + case PCRE2_ERROR_BACKSLASH_K_SYNTAX: + *errcode = G_REGEX_ERROR_MISSING_NAME; *errmsg = _("\\k is not followed by a braced, angle-bracketed, or quoted name"); break; - case G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: + case PCRE2_ERROR_BACKSLASH_N_IN_CLASS: + *errcode = G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS; *errmsg = _("\\N is not supported in a class"); break; - case G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: - *errmsg = _("too many forward references"); - break; - case G_REGEX_ERROR_NAME_TOO_LONG: + case PCRE2_ERROR_VERB_NAME_TOO_LONG: + *errcode = G_REGEX_ERROR_NAME_TOO_LONG; *errmsg = _("name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)"); break; - case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: - *errmsg = _("character value in \\u.... sequence is too large"); - break; - - case 116: /* erroffset passed as NULL */ - /* This should not happen as we never pass a NULL erroffset */ - g_warning ("erroffset passed as NULL"); - *errcode = G_REGEX_ERROR_COMPILE; - break; - case 117: /* unknown option bit(s) set */ - /* This should not happen as we check options before passing them - * to pcre_compile2() */ - g_warning ("unknown option bit(s) set"); - *errcode = G_REGEX_ERROR_COMPILE; - break; - case 132: /* this version of PCRE is compiled without UTF support */ - case 144: /* invalid UTF-8 string */ - case 145: /* support for \\P, \\p, and \\X has not been compiled */ - case 167: /* this version of PCRE is not compiled with Unicode property support */ - case 173: /* disallowed Unicode code point (>= 0xd800 && <= 0xdfff) */ - case 174: /* invalid UTF-16 string */ - /* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE - * and we do not check if strings are valid */ - case 170: /* internal error: unknown opcode in find_fixedlength() */ + case PCRE2_ERROR_INTERNAL_CODE_OVERFLOW: *errcode = G_REGEX_ERROR_INTERNAL; + *errmsg = _("code overflow"); break; - + case PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P: + *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; + *errmsg = _("unrecognized character after (?P"); + break; + case PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE: + *errcode = G_REGEX_ERROR_INTERNAL; + *errmsg = _("overran compiling workspace"); + break; + case PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN: + *errcode = G_REGEX_ERROR_INTERNAL; + *errmsg = _("previously-checked referenced subpattern not found"); + break; + case PCRE2_ERROR_HEAP_FAILED: + case PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW: + case PCRE2_ERROR_UNICODE_NOT_SUPPORTED: + case PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT: + case PCRE2_ERROR_NO_SURROGATES_IN_UTF16: + case PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS: + case PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE: + case PCRE2_ERROR_INTERNAL_STUDY_ERROR: + case PCRE2_ERROR_UTF_IS_DISABLED: + case PCRE2_ERROR_UCP_IS_DISABLED: + case PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS: + case PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED: + case PCRE2_ERROR_INTERNAL_BAD_CODE: + case PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP: + *errcode = G_REGEX_ERROR_INTERNAL; + *errmsg = _("internal error"); + break; + case PCRE2_ERROR_INVALID_SUBPATTERN_NAME: + case PCRE2_ERROR_CLASS_INVALID_RANGE: + case PCRE2_ERROR_ZERO_RELATIVE_REFERENCE: + case PCRE2_ERROR_PARENTHESES_STACK_CHECK: + case PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED: + case PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG: + case PCRE2_ERROR_MISSING_CALLOUT_CLOSING: + case PCRE2_ERROR_ESCAPE_INVALID_IN_VERB: + case PCRE2_ERROR_NULL_PATTERN: + case PCRE2_ERROR_BAD_OPTIONS: + case PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP: + case PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE: + case PCRE2_ERROR_INVALID_OCTAL: + case PCRE2_ERROR_CALLOUT_STRING_TOO_LONG: + case PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG: + case PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS: + case PCRE2_ERROR_VERSION_CONDITION_SYNTAX: + case PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER: + case PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER: + case PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED: + case PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP: + case PCRE2_ERROR_PATTERN_TOO_COMPLICATED: + case PCRE2_ERROR_LOOKBEHIND_TOO_LONG: + case PCRE2_ERROR_PATTERN_STRING_TOO_LONG: + case PCRE2_ERROR_BAD_LITERAL_OPTIONS: default: *errcode = G_REGEX_ERROR_COMPILE; + *errmsg = _("internal error"); + break; } + + g_assert (*errcode != 0); + g_assert (*errmsg != NULL); } /* GMatchInfo */ @@ -570,12 +749,16 @@ match_info_new (const GRegex *regex, match_info->regex = g_regex_ref ((GRegex *)regex); match_info->string = string; match_info->string_len = string_len; - match_info->matches = PCRE_ERROR_NOMATCH; + match_info->matches = PCRE2_ERROR_NOMATCH; match_info->pos = start_position; match_info->match_opts = match_options; - pcre_fullinfo (regex->pcre_re, regex->extra, - PCRE_INFO_CAPTURECOUNT, &match_info->n_subpatterns); + pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, + &match_info->n_subpatterns); + + match_info->match_context = pcre2_match_context_create (NULL); + pcre2_set_match_limit (match_info->match_context, 65536); /* should be plenty */ + pcre2_set_recursion_limit (match_info->match_context, 64); /* should be plenty */ if (is_dfa) { @@ -595,9 +778,41 @@ match_info_new (const GRegex *regex, match_info->offsets[0] = -1; match_info->offsets[1] = -1; + match_info->match_data = pcre2_match_data_create_from_pattern ( + match_info->regex->pcre_re, + NULL); + return match_info; } +static gboolean +recalc_match_offsets (GMatchInfo *match_info, + GError **error) +{ + PCRE2_SIZE *ovector; + gint i; + + if (pcre2_get_ovector_count (match_info->match_data) > G_MAXINT / 2) + { + g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, + _("Error while matching regular expression %s: %s"), + match_info->regex->pattern, _("code overflow")); + return FALSE; + } + + match_info->n_offsets = pcre2_get_ovector_count (match_info->match_data) * 2; + ovector = pcre2_get_ovector_pointer (match_info->match_data); + match_info->offsets = g_realloc_n (match_info->offsets, + match_info->n_offsets, + sizeof (gint)); + for (i = 0; i < match_info->n_offsets; i++) + { + match_info->offsets[i] = (int) ovector[i]; + } + + return TRUE; +} + /** * g_match_info_get_regex: * @match_info: a #GMatchInfo @@ -669,6 +884,10 @@ g_match_info_unref (GMatchInfo *match_info) if (g_atomic_int_dec_and_test (&match_info->ref_count)) { g_regex_unref (match_info->regex); + if (match_info->match_context) + pcre2_match_context_free (match_info->match_context); + if (match_info->match_data) + pcre2_match_data_free (match_info->match_data); g_free (match_info->offsets); g_free (match_info->workspace); g_free (match_info); @@ -715,6 +934,7 @@ g_match_info_next (GMatchInfo *match_info, { gint prev_match_start; gint prev_match_end; + gint opts; g_return_val_if_fail (match_info != NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE); @@ -727,25 +947,29 @@ g_match_info_next (GMatchInfo *match_info, { /* we have reached the end of the string */ match_info->pos = -1; - match_info->matches = PCRE_ERROR_NOMATCH; + match_info->matches = PCRE2_ERROR_NOMATCH; return FALSE; } - match_info->matches = pcre_exec (match_info->regex->pcre_re, - match_info->regex->extra, - match_info->string, - match_info->string_len, - match_info->pos, - match_info->regex->match_opts | match_info->match_opts, - match_info->offsets, - match_info->n_offsets); - if (IS_PCRE_ERROR (match_info->matches)) + opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts); + match_info->matches = pcre2_match (match_info->regex->pcre_re, + (PCRE2_SPTR8) match_info->string, + match_info->string_len, + match_info->pos, + opts & ~G_REGEX_FLAGS_CONVERTED, + match_info->match_data, + match_info->match_context); + + if (IS_PCRE2_ERROR (match_info->matches)) { g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, _("Error while matching regular expression %s: %s"), match_info->regex->pattern, match_error (match_info->matches)); return FALSE; } + else + if (!recalc_match_offsets (match_info, error)) + return FALSE; /* avoid infinite loops if the pattern is an empty string or something * equivalent */ @@ -755,7 +979,7 @@ g_match_info_next (GMatchInfo *match_info, { /* we have reached the end of the string */ match_info->pos = -1; - match_info->matches = PCRE_ERROR_NOMATCH; + match_info->matches = PCRE2_ERROR_NOMATCH; return FALSE; } @@ -833,10 +1057,10 @@ g_match_info_get_match_count (const GMatchInfo *match_info) { g_return_val_if_fail (match_info, -1); - if (match_info->matches == PCRE_ERROR_NOMATCH) + if (match_info->matches == PCRE2_ERROR_NOMATCH) /* no match */ return 0; - else if (match_info->matches < PCRE_ERROR_NOMATCH) + else if (match_info->matches < PCRE2_ERROR_NOMATCH) /* error */ return -1; else @@ -891,7 +1115,7 @@ g_match_info_is_partial_match (const GMatchInfo *match_info) { g_return_val_if_fail (match_info != NULL, FALSE); - return match_info->matches == PCRE_ERROR_PARTIAL; + return match_info->matches == PCRE2_ERROR_PARTIAL; } /** @@ -988,8 +1212,6 @@ gchar * g_match_info_fetch (const GMatchInfo *match_info, gint match_num) { - /* we cannot use pcre_get_substring() because it allocates the - * string using pcre_malloc(). */ gchar *match = NULL; gint start, end; @@ -1069,24 +1291,25 @@ g_match_info_fetch_pos (const GMatchInfo *match_info, * Returns number of first matched subpattern with name @name. * There may be more than one in case when DUPNAMES is used, * and not all subpatterns with that name match; - * pcre_get_stringnumber() does not work in that case. + * pcre2_substring_number_from_name() does not work in that case. */ static gint get_matched_substring_number (const GMatchInfo *match_info, const gchar *name) { gint entrysize; - gchar *first, *last; + PCRE2_SPTR first, last; guchar *entry; - if (!(match_info->regex->compile_opts & G_REGEX_DUPNAMES)) - return pcre_get_stringnumber (match_info->regex->pcre_re, name); + if (!(match_info->regex->compile_opts & PCRE2_DUPNAMES)) + return pcre2_substring_number_from_name (match_info->regex->pcre_re, (PCRE2_SPTR8) name); - /* This code is copied from pcre_get.c: get_first_set() */ - entrysize = pcre_get_stringtable_entries (match_info->regex->pcre_re, - name, - &first, - &last); + /* This code is analogous to code from pcre2_substring.c: + * pcre2_substring_get_byname() */ + entrysize = pcre2_substring_nametable_scan (match_info->regex->pcre_re, + (PCRE2_SPTR8) name, + &first, + &last); if (entrysize <= 0) return entrysize; @@ -1124,8 +1347,6 @@ gchar * g_match_info_fetch_named (const GMatchInfo *match_info, const gchar *name) { - /* we cannot use pcre_get_named_substring() because it allocates the - * string using pcre_malloc(). */ gint num; g_return_val_if_fail (match_info != NULL, NULL); @@ -1207,8 +1428,6 @@ g_match_info_fetch_named_pos (const GMatchInfo *match_info, gchar ** g_match_info_fetch_all (const GMatchInfo *match_info) { - /* we cannot use pcre_get_substring_list() because the returned value - * isn't suitable for g_strfreev(). */ gchar **result; gint i; @@ -1266,9 +1485,7 @@ g_regex_unref (GRegex *regex) { g_free (regex->pattern); if (regex->pcre_re != NULL) - pcre_free (regex->pcre_re); - if (regex->extra != NULL) - pcre_free (regex->extra); + pcre2_code_free (regex->pcre_re); g_free (regex); } } @@ -1276,11 +1493,11 @@ g_regex_unref (GRegex *regex) /* * @match_options: (inout) (optional): */ -static pcre *regex_compile (const gchar *pattern, - GRegexCompileFlags compile_options, - GRegexCompileFlags *compile_options_out, - GRegexMatchFlags *match_options, - GError **error); +static pcre2_code *regex_compile (const gchar *pattern, + GRegexCompileFlags compile_options, + GRegexCompileFlags *compile_options_out, + GRegexMatchFlags *match_options, + GError **error); /** * g_regex_new: @@ -1304,10 +1521,13 @@ g_regex_new (const gchar *pattern, GError **error) { GRegex *regex; - pcre *re; - const gchar *errmsg; - gboolean optimize = FALSE; + pcre2_code *re; static gsize initialised = 0; + GRegexCompileFlags orig_compile_opts; + + orig_compile_opts = compile_options; + compile_options = map_to_pcre2_compile_flags (compile_options); + match_options = map_to_pcre2_match_flags (match_options); g_return_val_if_fail (pattern != NULL, NULL); g_return_val_if_fail (error == NULL || *error == NULL, NULL); @@ -1316,17 +1536,13 @@ g_regex_new (const gchar *pattern, if (g_once_init_enter (&initialised)) { - int supports_utf8, supports_ucp; + int supports_utf8; - pcre_config (PCRE_CONFIG_UTF8, &supports_utf8); + pcre2_config (PCRE2_CONFIG_UNICODE, &supports_utf8); if (!supports_utf8) g_critical (_("PCRE library is compiled without UTF8 support")); - pcre_config (PCRE_CONFIG_UNICODE_PROPERTIES, &supports_ucp); - if (!supports_ucp) - g_critical (_("PCRE library is compiled without UTF8 properties support")); - - g_once_init_leave (&initialised, supports_utf8 && supports_ucp ? 1 : 2); + g_once_init_leave (&initialised, supports_utf8 ? 1 : 2); } if (G_UNLIKELY (initialised != 1)) @@ -1336,14 +1552,22 @@ g_regex_new (const gchar *pattern, return NULL; } - /* G_REGEX_OPTIMIZE has the same numeric value of PCRE_NO_UTF8_CHECK, - * as we do not need to wrap PCRE_NO_UTF8_CHECK. */ - if (compile_options & G_REGEX_OPTIMIZE) - optimize = TRUE; + switch (compile_options & G_REGEX_NEWLINE_MASK) + { + case 0: /* PCRE2_NEWLINE_ANY */ + case PCRE2_NEWLINE_CR: + case PCRE2_NEWLINE_LF: + case PCRE2_NEWLINE_CRLF: + case PCRE2_NEWLINE_ANYCRLF: + break; + default: + g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS, + "Invalid newline flags"); + return NULL; + } re = regex_compile (pattern, compile_options, &compile_options, &match_options, error); - if (re == NULL) return NULL; @@ -1352,80 +1576,85 @@ g_regex_new (const gchar *pattern, regex->pattern = g_strdup (pattern); regex->pcre_re = re; regex->compile_opts = compile_options; + regex->orig_compile_opts = orig_compile_opts; regex->match_opts = match_options; - if (optimize) - { - regex->extra = pcre_study (regex->pcre_re, 0, &errmsg); - if (errmsg != NULL) - { - GError *tmp_error = g_error_new (G_REGEX_ERROR, - G_REGEX_ERROR_OPTIMIZE, - _("Error while optimizing " - "regular expression %s: %s"), - regex->pattern, - errmsg); - g_propagate_error (error, tmp_error); - - g_regex_unref (regex); - return NULL; - } - } - return regex; } -static pcre * -regex_compile (const gchar *pattern, - GRegexCompileFlags compile_options, - GRegexCompileFlags *compile_options_out, - GRegexMatchFlags *match_options, - GError **error) +static gint +extract_newline_options (const GRegexCompileFlags compile_options, + const GRegexMatchFlags *match_options) { - pcre *re; + gint newline_options = PCRE2_NEWLINE_ANY; + + if (compile_options & G_REGEX_NEWLINE_MASK) + newline_options = compile_options & G_REGEX_NEWLINE_MASK; + if (match_options && *match_options & G_REGEX_MATCH_NEWLINE_MASK) + newline_options = *match_options & G_REGEX_MATCH_NEWLINE_MASK; + + return newline_options; +} + +static gint +extract_bsr_options (const GRegexCompileFlags compile_options, + const GRegexMatchFlags *match_options) +{ + gint bsr_options = PCRE2_BSR_UNICODE; + + if (compile_options & PCRE2_BSR_ANYCRLF) + bsr_options = PCRE2_BSR_ANYCRLF; + if (match_options && *match_options & PCRE2_BSR_ANYCRLF) + bsr_options = PCRE2_BSR_ANYCRLF; + if (match_options && *match_options & PCRE2_BSR_UNICODE) + bsr_options = PCRE2_BSR_UNICODE; + + return bsr_options; +} + +static pcre2_code * +regex_compile (const gchar *pattern, + GRegexCompileFlags compile_options, + GRegexCompileFlags *compile_options_out, + GRegexMatchFlags *match_options, + GError **error) +{ + pcre2_code *re; + pcre2_compile_context *context; const gchar *errmsg; - gint erroffset; + PCRE2_SIZE erroffset; gint errcode; GRegexCompileFlags nonpcre_compile_options; unsigned long int pcre_compile_options; nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK; - /* In GRegex the string are, by default, UTF-8 encoded. PCRE - * instead uses UTF-8 only if required with PCRE_UTF8. */ - if (compile_options & G_REGEX_RAW) - { - /* disable utf-8 */ - compile_options &= ~G_REGEX_RAW; - } - else - { - /* enable utf-8 */ - compile_options |= PCRE_UTF8 | PCRE_NO_UTF8_CHECK; + context = pcre2_compile_context_create (NULL); + /* set newline options */ + pcre2_set_newline (context, extract_newline_options (compile_options, match_options)); + + /* set bsr options */ + pcre2_set_bsr (context, extract_bsr_options (compile_options, match_options)); + + /* In case UTF-8 mode is used, also set PCRE2_NO_UTF_CHECK */ + if (compile_options & PCRE2_UTF) + { + compile_options |= PCRE2_NO_UTF_CHECK; if (match_options != NULL) - *match_options |= PCRE_NO_UTF8_CHECK; + *match_options |= PCRE2_NO_UTF_CHECK; } - /* PCRE_NEWLINE_ANY is the default for the internal PCRE but - * not for the system one. */ - if (!(compile_options & G_REGEX_NEWLINE_CR) && - !(compile_options & G_REGEX_NEWLINE_LF)) - { - compile_options |= PCRE_NEWLINE_ANY; - } - - compile_options |= PCRE_UCP; - - /* PCRE_BSR_UNICODE is the default for the internal PCRE but - * possibly not for the system one. - */ - if (~compile_options & G_REGEX_BSR_ANYCRLF) - compile_options |= PCRE_BSR_UNICODE; + compile_options |= PCRE2_UCP; /* compile the pattern */ - re = pcre_compile2 (pattern, compile_options, &errcode, - &errmsg, &erroffset, NULL); + re = pcre2_compile ((PCRE2_SPTR8) pattern, + PCRE2_ZERO_TERMINATED, + compile_options & ~G_REGEX_FLAGS_CONVERTED, + &errcode, + &erroffset, + context); + pcre2_compile_context_free (context); /* if the compilation failed, set the error member and return * immediately */ @@ -1442,7 +1671,7 @@ regex_compile (const gchar *pattern, tmp_error = g_error_new (G_REGEX_ERROR, errcode, _("Error while compiling regular " - "expression %s at char %d: %s"), + "expression %s at char %" G_GSIZE_FORMAT ": %s"), pattern, erroffset, errmsg); g_propagate_error (error, tmp_error); @@ -1451,22 +1680,22 @@ regex_compile (const gchar *pattern, /* For options set at the beginning of the pattern, pcre puts them into * compile options, e.g. "(?i)foo" will make the pcre structure store - * PCRE_CASELESS even though it wasn't explicitly given for compilation. */ - pcre_fullinfo (re, NULL, PCRE_INFO_OPTIONS, &pcre_compile_options); + * PCRE2_CASELESS even though it wasn't explicitly given for compilation. */ + pcre2_pattern_info (re, PCRE2_INFO_ALLOPTIONS, &pcre_compile_options); compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK; - /* Don't leak PCRE_NEWLINE_ANY, which is part of PCRE_NEWLINE_ANYCRLF */ - if ((pcre_compile_options & PCRE_NEWLINE_ANYCRLF) != PCRE_NEWLINE_ANYCRLF) - compile_options &= ~PCRE_NEWLINE_ANY; + /* Don't leak PCRE2_NEWLINE_ANY, which is part of PCRE2_NEWLINE_ANYCRLF */ + if ((pcre_compile_options & PCRE2_NEWLINE_ANYCRLF) != PCRE2_NEWLINE_ANYCRLF) + compile_options &= ~PCRE2_NEWLINE_ANY; compile_options |= nonpcre_compile_options; - if (!(compile_options & G_REGEX_DUPNAMES)) + if (!(compile_options & PCRE2_DUPNAMES)) { gboolean jchanged = FALSE; - pcre_fullinfo (re, NULL, PCRE_INFO_JCHANGED, &jchanged); + pcre2_pattern_info (re, PCRE2_INFO_JCHANGED, &jchanged); if (jchanged) - compile_options |= G_REGEX_DUPNAMES; + compile_options |= PCRE2_DUPNAMES; } if (compile_options_out != 0) @@ -1511,8 +1740,7 @@ g_regex_get_max_backref (const GRegex *regex) { gint value; - pcre_fullinfo (regex->pcre_re, regex->extra, - PCRE_INFO_BACKREFMAX, &value); + pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BACKREFMAX, &value); return value; } @@ -1532,8 +1760,7 @@ g_regex_get_capture_count (const GRegex *regex) { gint value; - pcre_fullinfo (regex->pcre_re, regex->extra, - PCRE_INFO_CAPTURECOUNT, &value); + pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, &value); return value; } @@ -1553,8 +1780,7 @@ g_regex_get_has_cr_or_lf (const GRegex *regex) { gint value; - pcre_fullinfo (regex->pcre_re, regex->extra, - PCRE_INFO_HASCRORLF, &value); + pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_HASCRORLF, &value); return !!value; } @@ -1576,8 +1802,8 @@ g_regex_get_max_lookbehind (const GRegex *regex) { gint max_lookbehind; - pcre_fullinfo (regex->pcre_re, regex->extra, - PCRE_INFO_MAXLOOKBEHIND, &max_lookbehind); + pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_MAXLOOKBEHIND, + &max_lookbehind); return max_lookbehind; } @@ -1599,9 +1825,47 @@ g_regex_get_max_lookbehind (const GRegex *regex) GRegexCompileFlags g_regex_get_compile_flags (const GRegex *regex) { + gint extra_flags, info_value; + g_return_val_if_fail (regex != NULL, 0); - return regex->compile_opts; +G_GNUC_BEGIN_IGNORE_DEPRECATIONS + /* Preserve original G_REGEX_OPTIMIZE */ + extra_flags = (regex->orig_compile_opts & G_REGEX_OPTIMIZE); +G_GNUC_END_IGNORE_DEPRECATIONS + + /* Also include the newline options */ + pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_NEWLINE, &info_value); + switch (info_value) + { + case PCRE2_NEWLINE_ANYCRLF: + extra_flags |= G_REGEX_NEWLINE_ANYCRLF; + break; + case PCRE2_NEWLINE_CRLF: + extra_flags |= G_REGEX_NEWLINE_CRLF; + break; + case PCRE2_NEWLINE_LF: + extra_flags |= G_REGEX_NEWLINE_LF; + break; + case PCRE2_NEWLINE_CR: + extra_flags |= G_REGEX_NEWLINE_CR; + break; + default: + break; + } + + /* Also include the bsr options */ + pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BSR, &info_value); + switch (info_value) + { + case PCRE2_BSR_ANYCRLF: + extra_flags |= G_REGEX_BSR_ANYCRLF; + break; + default: + break; + } + + return map_to_pcre1_compile_flags (regex->compile_opts) | extra_flags; } /** @@ -1619,7 +1883,7 @@ g_regex_get_match_flags (const GRegex *regex) { g_return_val_if_fail (regex != NULL, 0); - return regex->match_opts & G_REGEX_MATCH_MASK; + return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK); } /** @@ -1653,6 +1917,9 @@ g_regex_match_simple (const gchar *pattern, GRegex *regex; gboolean result; + compile_options = map_to_pcre2_compile_flags (compile_options); + match_options = map_to_pcre2_match_flags (match_options); + regex = g_regex_new (pattern, compile_options, G_REGEX_MATCH_DEFAULT, NULL); if (!regex) return FALSE; @@ -1720,6 +1987,8 @@ g_regex_match (const GRegex *regex, GRegexMatchFlags match_options, GMatchInfo **match_info) { + match_options = map_to_pcre2_match_flags (match_options); + return g_regex_match_full (regex, string, -1, 0, match_options, match_info, NULL); } @@ -1803,6 +2072,8 @@ g_regex_match_full (const GRegex *regex, GMatchInfo *info; gboolean match_ok; + match_options = map_to_pcre2_match_flags (match_options); + g_return_val_if_fail (regex != NULL, FALSE); g_return_val_if_fail (string != NULL, FALSE); g_return_val_if_fail (start_position >= 0, FALSE); @@ -1853,6 +2124,8 @@ g_regex_match_all (const GRegex *regex, GRegexMatchFlags match_options, GMatchInfo **match_info) { + match_options = map_to_pcre2_match_flags (match_options); + return g_regex_match_all_full (regex, string, -1, 0, match_options, match_info, NULL); } @@ -1922,39 +2195,29 @@ g_regex_match_all_full (const GRegex *regex, { GMatchInfo *info; gboolean done; - pcre *pcre_re; - pcre_extra *extra; + pcre2_code *pcre_re; gboolean retval; + match_options = map_to_pcre2_match_flags (match_options); + g_return_val_if_fail (regex != NULL, FALSE); g_return_val_if_fail (string != NULL, FALSE); g_return_val_if_fail (start_position >= 0, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE); g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE); -#ifdef PCRE_NO_AUTO_POSSESS - /* For PCRE >= 8.34 we need to turn off PCRE_NO_AUTO_POSSESS, which - * is an optimization for normal regex matching, but results in omitting - * some shorter matches here, and an observable behaviour change. + /* For PCRE2 we need to turn off PCRE2_NO_AUTO_POSSESS, which is an + * optimization for normal regex matching, but results in omitting some + * shorter matches here, and an observable behaviour change. * * DFA matching is rather niche, and very rarely used according to * codesearch.debian.net, so don't bother caching the recompiled RE. */ pcre_re = regex_compile (regex->pattern, - regex->compile_opts | PCRE_NO_AUTO_POSSESS, + regex->compile_opts | PCRE2_NO_AUTO_POSSESS, NULL, NULL, error); - if (pcre_re == NULL) return FALSE; - /* Not bothering to cache the optimization data either, with similar - * reasoning */ - extra = NULL; -#else - /* For PCRE < 8.33 the precompiled regex is fine. */ - pcre_re = regex->pcre_re; - extra = regex->extra; -#endif - info = match_info_new (regex, string, string_len, start_position, match_options, TRUE); @@ -1962,29 +2225,38 @@ g_regex_match_all_full (const GRegex *regex, while (!done) { done = TRUE; - info->matches = pcre_dfa_exec (pcre_re, extra, - info->string, info->string_len, - info->pos, - regex->match_opts | match_options, - info->offsets, info->n_offsets, - info->workspace, info->n_workspace); - if (info->matches == PCRE_ERROR_DFA_WSSIZE) + info->matches = pcre2_dfa_match (pcre_re, + (PCRE2_SPTR8) info->string, info->string_len, + info->pos, + (regex->match_opts | match_options | PCRE2_NO_UTF_CHECK) & ~G_REGEX_FLAGS_CONVERTED, + info->match_data, + info->match_context, + info->workspace, info->n_workspace); + + if (!recalc_match_offsets (info, error)) + return FALSE; + + if (info->matches == PCRE2_ERROR_DFA_WSSIZE) { /* info->workspace is too small. */ info->n_workspace *= 2; - info->workspace = g_realloc (info->workspace, - info->n_workspace * sizeof (gint)); + info->workspace = g_realloc_n (info->workspace, + info->n_workspace, + sizeof (gint)); done = FALSE; } else if (info->matches == 0) { /* info->offsets is too small. */ info->n_offsets *= 2; - info->offsets = g_realloc (info->offsets, - info->n_offsets * sizeof (gint)); + info->offsets = g_realloc_n (info->offsets, + info->n_offsets, + sizeof (gint)); + pcre2_match_data_free (info->match_data); + info->match_data = pcre2_match_data_create (info->n_offsets, NULL); done = FALSE; } - else if (IS_PCRE_ERROR (info->matches)) + else if (IS_PCRE2_ERROR (info->matches)) { g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, _("Error while matching regular expression %s: %s"), @@ -1992,9 +2264,7 @@ g_regex_match_all_full (const GRegex *regex, } } -#ifdef PCRE_NO_AUTO_POSSESS - pcre_free (pcre_re); -#endif + pcre2_code_free (pcre_re); /* don’t assert that (info->matches <= info->n_subpatterns + 1) as that only * holds true for a single match, rather than matching all */ @@ -2032,8 +2302,8 @@ g_regex_get_string_number (const GRegex *regex, g_return_val_if_fail (regex != NULL, -1); g_return_val_if_fail (name != NULL, -1); - num = pcre_get_stringnumber (regex->pcre_re, name); - if (num == PCRE_ERROR_NOSUBSTRING) + num = pcre2_substring_number_from_name (regex->pcre_re, (PCRE2_SPTR8) name); + if (num == PCRE2_ERROR_NOSUBSTRING) num = -1; return num; @@ -2088,6 +2358,9 @@ g_regex_split_simple (const gchar *pattern, GRegex *regex; gchar **result; + compile_options = map_to_pcre2_compile_flags (compile_options); + match_options = map_to_pcre2_match_flags (match_options); + regex = g_regex_new (pattern, compile_options, 0, NULL); if (!regex) return NULL; @@ -2131,6 +2404,8 @@ g_regex_split (const GRegex *regex, const gchar *string, GRegexMatchFlags match_options) { + match_options = map_to_pcre2_match_flags (match_options); + return g_regex_split_full (regex, string, -1, 0, match_options, 0, NULL); } @@ -2195,6 +2470,8 @@ g_regex_split_full (const GRegex *regex, /* the returned array of char **s */ gchar **string_list; + match_options = map_to_pcre2_match_flags (match_options); + g_return_val_if_fail (regex != NULL, NULL); g_return_val_if_fail (string != NULL, NULL); g_return_val_if_fail (start_position >= 0, NULL); @@ -2819,6 +3096,8 @@ g_regex_replace (const GRegex *regex, GList *list; GError *tmp_error = NULL; + match_options = map_to_pcre2_match_flags (match_options); + g_return_val_if_fail (regex != NULL, NULL); g_return_val_if_fail (string != NULL, NULL); g_return_val_if_fail (start_position >= 0, NULL); @@ -2888,6 +3167,8 @@ g_regex_replace_literal (const GRegex *regex, GRegexMatchFlags match_options, GError **error) { + match_options = map_to_pcre2_match_flags (match_options); + g_return_val_if_fail (replacement != NULL, NULL); g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); @@ -2976,6 +3257,8 @@ g_regex_replace_eval (const GRegex *regex, gboolean done = FALSE; GError *tmp_error = NULL; + match_options = map_to_pcre2_match_flags (match_options); + g_return_val_if_fail (regex != NULL, NULL); g_return_val_if_fail (string != NULL, NULL); g_return_val_if_fail (start_position >= 0, NULL); diff --git a/glib/gregex.h b/glib/gregex.h index 3fd61806f..7010d52ab 100644 --- a/glib/gregex.h +++ b/glib/gregex.h @@ -264,7 +264,9 @@ GQuark g_regex_error_quark (void); * in the usual way). * @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will * be used many times, then it may be worth the effort to optimize it - * to improve the speed of matches. + * to improve the speed of matches. Deprecated in GLib 2.74 which now uses + * libpcre2, which doesn’t require separate optimization of queries. This + * option is now a no-op. Deprecated: 2.74 * @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the * first newline. Since: 2.34 * @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not @@ -287,7 +289,8 @@ GQuark g_regex_error_quark (void); * is recognised. If this option is set, then "\R" only recognizes the newline * characters '\r', '\n' and '\r\n'. Since: 2.34 * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with - * JavaScript rather than PCRE. Since: 2.34 + * JavaScript rather than PCRE. Since GLib 2.74 this is no longer supported, + * as libpcre2 does not support it. Since: 2.34 Deprecated: 2.74 * * Flags specifying compile-time options. * @@ -308,7 +311,7 @@ typedef enum G_REGEX_UNGREEDY = 1 << 9, G_REGEX_RAW = 1 << 11, G_REGEX_NO_AUTO_CAPTURE = 1 << 12, - G_REGEX_OPTIMIZE = 1 << 13, + G_REGEX_OPTIMIZE GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 13, G_REGEX_FIRSTLINE = 1 << 18, G_REGEX_DUPNAMES = 1 << 19, G_REGEX_NEWLINE_CR = 1 << 20, @@ -316,7 +319,7 @@ typedef enum G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF, G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22, G_REGEX_BSR_ANYCRLF = 1 << 23, - G_REGEX_JAVASCRIPT_COMPAT = 1 << 25 + G_REGEX_JAVASCRIPT_COMPAT GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 25 } GRegexCompileFlags; /** diff --git a/glib/meson.build b/glib/meson.build index 4f8240b24..6062c11a1 100644 --- a/glib/meson.build +++ b/glib/meson.build @@ -357,13 +357,13 @@ else glib_dtrace_hdr = [] endif -pcre_static_args = [] +pcre2_static_args = [] -if use_pcre_static_flag - pcre_static_args = ['-DPCRE_STATIC'] +if use_pcre2_static_flag + pcre2_static_args = ['-DPCRE2_STATIC'] endif -glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre_static_args + glib_hidden_visibility_args +glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre2_static_args + glib_hidden_visibility_args libglib = library('glib-2.0', glib_dtrace_obj, glib_dtrace_hdr, sources : [deprecated_sources, glib_sources], @@ -375,7 +375,7 @@ libglib = library('glib-2.0', link_args : [noseh_link_args, glib_link_flags, win32_ldflags], include_directories : configinc, link_with: [charset_lib, gnulib_lib], - dependencies : [pcre, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep], + dependencies : [pcre2, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep], c_args : glib_c_args, objc_args : glib_c_args, ) diff --git a/glib/tests/meson.build b/glib/tests/meson.build index 9b3b3bfa4..193224d86 100644 --- a/glib/tests/meson.build +++ b/glib/tests/meson.build @@ -89,8 +89,8 @@ glib_tests = { }, 'refstring' : {}, 'regex' : { - 'dependencies' : [pcre], - 'c_args' : use_pcre_static_flag ? ['-DPCRE_STATIC'] : [], + 'dependencies' : [pcre2], + 'c_args' : use_pcre2_static_flag ? ['-DPCRE2_STATIC'] : [], }, 'relation' : {}, 'rwlock' : {}, diff --git a/glib/tests/regex.c b/glib/tests/regex.c index c39d640fa..3355f64e5 100644 --- a/glib/tests/regex.c +++ b/glib/tests/regex.c @@ -27,7 +27,8 @@ #include #include "glib.h" -#include +#define PCRE2_CODE_UNIT_WIDTH 8 +#include /* U+20AC EURO SIGN (symbol, currency) */ #define EURO "\xe2\x82\xac" @@ -1503,7 +1504,7 @@ test_properties (void) gchar *str; error = NULL; - regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); res = g_regex_match (regex, "ppPP01", 0, &match); g_assert (res); str = g_match_info_fetch (match, 0); @@ -1524,7 +1525,7 @@ test_class (void) gchar *str; error = NULL; - regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); res = g_regex_match (regex, "a:b:\340\254\236:\333\253:\316\240", 0, &match); g_assert (res); str = g_match_info_fetch (match, 0); @@ -1570,7 +1571,7 @@ test_lookahead (void) gint start, end; error = NULL; - regex = g_regex_new ("\\w+(?=;)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("\\w+(?=;)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "word1 word2: word3;", 0, &match); @@ -1584,7 +1585,7 @@ test_lookahead (void) g_regex_unref (regex); error = NULL; - regex = g_regex_new ("foo(?!bar)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("foo(?!bar)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "foobar foobaz", 0, &match); @@ -1599,7 +1600,7 @@ test_lookahead (void) g_regex_unref (regex); error = NULL; - regex = g_regex_new ("(?!bar)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("(?!bar)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "foobar foobaz", 0, &match); @@ -1632,7 +1633,7 @@ test_lookbehind (void) gint start, end; error = NULL; - regex = g_regex_new ("(?Mon|Fri|Sun)(?:day)?|(?Tue)(?:sday)?|(?Wed)(?:nesday)?|(?Thu)(?:rsday)?|(?Sat)(?:urday)?", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("(?Mon|Fri|Sun)(?:day)?|(?Tue)(?:sday)?|(?Wed)(?:nesday)?|(?Thu)(?:rsday)?|(?Sat)(?:urday)?", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "Mon Tuesday Wed Saturday", 0, &match); @@ -1896,7 +1897,7 @@ test_subpattern (void) g_match_info_free (match); g_regex_unref (regex); - regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "aaaaaaaaaaaaaaaa", 0, &match); @@ -1920,7 +1921,7 @@ test_condition (void) gboolean res; error = NULL; - regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "a(zzzzzz)b", 0, &match); @@ -1934,7 +1935,7 @@ test_condition (void) g_regex_unref (regex); error = NULL; - regex = g_regex_new ("^(a+)(?\\()?[^()]+(?()\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("^(a+)(?\\()?[^()]+(?()\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "a(zzzzzz)b", 0, &match); @@ -1947,7 +1948,7 @@ test_condition (void) g_match_info_free (match); g_regex_unref (regex); - regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "a[zzzzzz]b", 0, &match); @@ -1962,7 +1963,7 @@ test_condition (void) regex = g_regex_new ("(?(DEFINE) (? 2[0-4]\\d | 25[0-5] | 1\\d\\d | [1-9]?\\d) )" "\\b (?&byte) (\\.(?&byte)){3} \\b", - G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error); + G_REGEX_EXTENDED, 0, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "128.0.0.1", 0, &match); @@ -1981,7 +1982,7 @@ test_condition (void) regex = g_regex_new ("^(?(?=[^a-z]*[a-z])" "\\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} )$", - G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error); + G_REGEX_EXTENDED, 0, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "01-abc-24", 0, &match); @@ -2014,7 +2015,7 @@ test_recursion (void) gint start; error = NULL; - regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "(middle)", 0, &match); @@ -2031,7 +2032,7 @@ test_recursion (void) g_match_info_free (match); g_regex_unref (regex); - regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "((((((((((((((((middle))))))))))))))))", 0, &match); @@ -2044,7 +2045,7 @@ test_recursion (void) g_match_info_free (match); g_regex_unref (regex); - regex = g_regex_new ("^(? \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("^(? \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); g_regex_match (regex, "(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()", 0, &match); @@ -2053,7 +2054,7 @@ test_recursion (void) g_match_info_free (match); g_regex_unref (regex); - regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, ">>>", 0, &match); @@ -2072,7 +2073,7 @@ test_recursion (void) g_match_info_free (match); g_regex_unref (regex); - regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "abcdcba", 0, &match); @@ -2085,7 +2086,7 @@ test_recursion (void) g_match_info_free (match); g_regex_unref (regex); - regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "abcdcba", 0, &match); @@ -2098,7 +2099,7 @@ test_recursion (void) g_match_info_free (match); g_regex_unref (regex); - regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_OPTIMIZE|G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error); + regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error); g_assert (regex); g_assert_no_error (error); res = g_regex_match (regex, "abcdcba", 0, &match); @@ -2169,21 +2170,21 @@ test_max_lookbehind (void) } static gboolean -pcre_ge (guint64 major, guint64 minor) +pcre2_ge (guint64 major, guint64 minor) { - const char *version; - gchar *ptr; - guint64 pcre_major, pcre_minor; + gchar version[32]; + const gchar *ptr; + guint64 pcre2_major, pcre2_minor; - /* e.g. 8.35 2014-04-04 */ - version = pcre_version (); + /* e.g. 10.36 2020-12-04 */ + pcre2_config (PCRE2_CONFIG_VERSION, version); - pcre_major = g_ascii_strtoull (version, &ptr, 10); + pcre2_major = g_ascii_strtoull (version, (gchar **) &ptr, 10); /* ptr points to ".MINOR (release date)" */ g_assert (ptr[0] == '.'); - pcre_minor = g_ascii_strtoull (ptr + 1, NULL, 10); + pcre2_minor = g_ascii_strtoull (ptr + 1, NULL, 10); - return (pcre_major > major) || (pcre_major == major && pcre_minor >= minor); + return (pcre2_major > major) || (pcre2_major == major && pcre2_minor >= minor); } int @@ -2205,18 +2206,26 @@ main (int argc, char *argv[]) g_test_add_func ("/regex/max-lookbehind", test_max_lookbehind); /* TEST_NEW(pattern, compile_opts, match_opts) */ +G_GNUC_BEGIN_IGNORE_DEPRECATIONS TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL); +G_GNUC_END_IGNORE_DEPRECATIONS TEST_NEW("", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); TEST_NEW(".*", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); +G_GNUC_BEGIN_IGNORE_DEPRECATIONS TEST_NEW(".*", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT); +G_GNUC_END_IGNORE_DEPRECATIONS TEST_NEW(".*", G_REGEX_MULTILINE, G_REGEX_MATCH_DEFAULT); TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_DEFAULT); TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_NOTBOL); TEST_NEW("(123\\d*)[a-zA-Z]+(?P.*)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); TEST_NEW("(123\\d*)[a-zA-Z]+(?P.*)", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT); +G_GNUC_BEGIN_IGNORE_DEPRECATIONS TEST_NEW("(123\\d*)[a-zA-Z]+(?P.*)", G_REGEX_CASELESS | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT); +G_GNUC_END_IGNORE_DEPRECATIONS TEST_NEW("(?Px)|(?Py)", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT); +G_GNUC_BEGIN_IGNORE_DEPRECATIONS TEST_NEW("(?Px)|(?Py)", G_REGEX_DUPNAMES | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT); +G_GNUC_END_IGNORE_DEPRECATIONS /* This gives "internal error: code overflow" with pcre 6.0 */ TEST_NEW("(?i)(?-i)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); TEST_NEW ("(?i)a", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); @@ -2227,9 +2236,10 @@ main (int argc, char *argv[]) TEST_NEW ("(?U)[a-z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); /* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */ +G_GNUC_BEGIN_IGNORE_DEPRECATIONS TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0); +G_GNUC_END_IGNORE_DEPRECATIONS TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0); - TEST_NEW_CHECK_FLAGS ("(?X)a", 0, 0, 0 /* not exposed by GRegex */, 0); TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0); TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0); TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0); @@ -2257,16 +2267,16 @@ main (int argc, char *argv[]) TEST_NEW_FAIL ("a{4,2}", 0, G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER); TEST_NEW_FAIL ("a{999999,}", 0, G_REGEX_ERROR_QUANTIFIER_TOO_BIG); TEST_NEW_FAIL ("[a-z", 0, G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS); - TEST_NEW_FAIL ("(?X)[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS); + TEST_NEW_FAIL ("[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS); TEST_NEW_FAIL ("[z-a]", 0, G_REGEX_ERROR_RANGE_OUT_OF_ORDER); TEST_NEW_FAIL ("{2,4}", 0, G_REGEX_ERROR_NOTHING_TO_REPEAT); TEST_NEW_FAIL ("a(?u)", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER); - TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER); + TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME); TEST_NEW_FAIL ("a[:alpha:]b", 0, G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS); TEST_NEW_FAIL ("a(b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); TEST_NEW_FAIL ("a)b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); TEST_NEW_FAIL ("a(?R", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); - TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); + TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE); TEST_NEW_FAIL ("(ab\\2)", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE); TEST_NEW_FAIL ("a(?#abc", 0, G_REGEX_ERROR_UNTERMINATED_COMMENT); TEST_NEW_FAIL ("(?<=a+)b", 0, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND); @@ -2276,51 +2286,31 @@ main (int argc, char *argv[]) TEST_NEW_FAIL ("a[[:fubar:]]b", 0, G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME); TEST_NEW_FAIL ("[[.ch.]]", 0, G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED); TEST_NEW_FAIL ("\\x{110000}", 0, G_REGEX_ERROR_HEX_CODE_TOO_LARGE); - TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INVALID_CONDITION); + TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE); TEST_NEW_FAIL ("(?<=\\C)X", 0, G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND); - TEST_NEW_FAIL ("(?!\\w)(?R)", 0, G_REGEX_ERROR_INFINITE_LOOP); - if (pcre_ge (8, 37)) - { - /* The expected errors changed here. */ - TEST_NEW_FAIL ("(?(?foo)\\gfoo)\\gfoo)\\geks)(?Peccs)", 0, G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME); -#if 0 - TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_MALFORMED_PROPERTY); - TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_UNKNOWN_PROPERTY); -#endif TEST_NEW_FAIL ("\\666", G_REGEX_RAW, G_REGEX_ERROR_INVALID_OCTAL_VALUE); TEST_NEW_FAIL ("^(?(DEFINE) abc | xyz ) ", 0, G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE); TEST_NEW_FAIL ("a", G_REGEX_NEWLINE_CRLF | G_REGEX_NEWLINE_ANYCRLF, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS); TEST_NEW_FAIL ("^(a)\\g{3", 0, G_REGEX_ERROR_MISSING_BACK_REFERENCE); - TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE); - TEST_NEW_FAIL ("abc(*FAIL:123)xyz", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN); + TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE); + TEST_NEW ("abc(*FAIL:123)xyz", 0, 0); TEST_NEW_FAIL ("a(*FOOBAR)b", 0, G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB); - TEST_NEW_FAIL ("(?i:A{1,}\\6666666666)", 0, G_REGEX_ERROR_NUMBER_TOO_BIG); + if (pcre2_ge (10, 37)) + { + TEST_NEW ("(?i:A{1,}\\6666666666)", 0, 0); + } TEST_NEW_FAIL ("(?)(?&)", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME); - TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_MISSING_DIGIT); - TEST_NEW_FAIL ("TA]", G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_INVALID_DATA_CHARACTER); + TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE); TEST_NEW_FAIL ("(?|(?A)|(?B))", 0, G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME); TEST_NEW_FAIL ("a(*MARK)b", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED); TEST_NEW_FAIL ("^\\c€", 0, G_REGEX_ERROR_INVALID_CONTROL_CHAR); TEST_NEW_FAIL ("\\k", 0, G_REGEX_ERROR_MISSING_NAME); TEST_NEW_FAIL ("a[\\NB]c", 0, G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS); TEST_NEW_FAIL ("(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEFG)XX", 0, G_REGEX_ERROR_NAME_TOO_LONG); - TEST_NEW_FAIL ("\\u0100", G_REGEX_RAW | G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE); /* These errors can't really be tested easily: * G_REGEX_ERROR_EXPRESSION_TOO_LARGE @@ -2476,7 +2466,15 @@ main (int argc, char *argv[]) TEST_MATCH("a#\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); TEST_MATCH("a#\r\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); TEST_MATCH("a#\rb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); - TEST_MATCH("a#\nb", G_REGEX_EXTENDED, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, FALSE); + /* Due to PCRE2 only supporting newline settings passed to pcre2_compile (and + * not to pcre2_match also), we have to compile the pattern with the + * effective (combined from compile and match options) newline setting. + * However, this setting also affects how newlines are interpreted *inside* + * the pattern. With G_REGEX_EXTENDED, this changes where the comment + * (started with `#`) ends. + */ + /* On PCRE1, this test expected no match; on PCRE2 it matches because of the above. */ + TEST_MATCH("a#\nb", G_REGEX_EXTENDED, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, TRUE /*FALSE*/); TEST_MATCH("a#\nb", G_REGEX_EXTENDED | G_REGEX_NEWLINE_CR, 0, "a", -1, 0, 0, TRUE); TEST_MATCH("line\nbreak", G_REGEX_MULTILINE, 0, "this is a line\nbreak", -1, 0, 0, TRUE); @@ -2489,21 +2487,19 @@ main (int argc, char *argv[]) * with pcre's internal tables. Bug #678273 */ TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "DŽ", -1, 0, 0, TRUE); TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "dž", -1, 0, 0, TRUE); -#if PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 32) - /* This would incorrectly fail to match in pcre < 8.32, so only assert - * this for known-good pcre. */ TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "Dž", -1, 0, 0, TRUE); -#endif /* TEST_MATCH_NEXT#(pattern, string, string_len, start_position, ...) */ TEST_MATCH_NEXT0("a", "x", -1, 0); TEST_MATCH_NEXT0("a", "ax", -1, 1); TEST_MATCH_NEXT0("a", "xa", 1, 0); TEST_MATCH_NEXT0("a", "axa", 1, 2); + TEST_MATCH_NEXT1("", "", -1, 0, "", 0, 0); TEST_MATCH_NEXT1("a", "a", -1, 0, "a", 0, 1); TEST_MATCH_NEXT1("a", "xax", -1, 0, "a", 1, 2); TEST_MATCH_NEXT1(EURO, ENG EURO, -1, 0, EURO, 2, 5); TEST_MATCH_NEXT1("a*", "", -1, 0, "", 0, 0); + TEST_MATCH_NEXT2("", "a", -1, 0, "", 0, 0, "", 1, 1); TEST_MATCH_NEXT2("a*", "aa", -1, 0, "aa", 0, 2, "", 2, 2); TEST_MATCH_NEXT2(EURO "*", EURO EURO, -1, 0, EURO EURO, 0, 6, "", 6, 6); TEST_MATCH_NEXT2("a", "axa", -1, 0, "a", 0, 1, "a", 2, 3); @@ -2677,11 +2673,6 @@ main (int argc, char *argv[]) TEST_EXPAND("a", "a", "\\0130", FALSE, "X"); TEST_EXPAND("a", "a", "\\\\\\0", FALSE, "\\a"); TEST_EXPAND("a(?P.)c", "xabcy", "X\\gX", FALSE, "XbX"); -#if !(PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 34)) - /* PCRE >= 8.34 no longer allows this usage. */ - TEST_EXPAND("(.)(?P<1>.)", "ab", "\\1", FALSE, "a"); - TEST_EXPAND("(.)(?P<1>.)", "ab", "\\g<1>", FALSE, "a"); -#endif TEST_EXPAND(".", EURO, "\\0", FALSE, EURO); TEST_EXPAND("(.)", EURO, "\\1", FALSE, EURO); TEST_EXPAND("(?P.)", EURO, "\\g", FALSE, EURO); @@ -2800,6 +2791,10 @@ main (int argc, char *argv[]) TEST_GET_STRING_NUMBER("(?P.)(?Pa)", "A", 1); TEST_GET_STRING_NUMBER("(?P.)(?Pa)", "B", 2); TEST_GET_STRING_NUMBER("(?P.)(?Pa)", "C", -1); + TEST_GET_STRING_NUMBER("(?P.)(?Pa)(?Pb)", "A", 1); + TEST_GET_STRING_NUMBER("(?P.)(?Pa)(?Pb)", "B", 2); + TEST_GET_STRING_NUMBER("(?P.)(?Pa)(?Pb)", "C", 3); + TEST_GET_STRING_NUMBER("(?P.)(?Pa)(?Pb)", "D", -1); TEST_GET_STRING_NUMBER("(?P.)(.)(?Pa)", "A", 1); TEST_GET_STRING_NUMBER("(?P.)(.)(?Pa)", "B", 3); TEST_GET_STRING_NUMBER("(?P.)(.)(?Pa)", "C", -1); diff --git a/meson.build b/meson.build index 0edbc2bf0..00d38cbec 100644 --- a/meson.build +++ b/meson.build @@ -2017,37 +2017,38 @@ else libiconv = dependency('iconv') endif -pcre = dependency('libpcre', version: '>= 8.31', required : false) # Should check for Unicode support, too. FIXME -if not pcre.found() +pcre2 = dependency('libpcre2-8', version: '>= 10.32', required : false) +if not pcre2.found() if cc.get_id() == 'msvc' or cc.get_id() == 'clang-cl' - # MSVC: Search for the PCRE library by the configuration, which corresponds - # to the output of CMake builds of PCRE. Note that debugoptimized + # MSVC: Search for the PCRE2 library by the configuration, which corresponds + # to the output of CMake builds of PCRE2. Note that debugoptimized # is really a Release build with .PDB files. if vs_crt == 'debug' - pcre = cc.find_library('pcred', required : false) + pcre2 = cc.find_library('pcre2d-8', required : false) else - pcre = cc.find_library('pcre', required : false) + pcre2 = cc.find_library('pcre2-8', required : false) endif endif endif # Try again with the fallback -if not pcre.found() - pcre = dependency('libpcre', required : true, fallback : ['pcre', 'pcre_dep']) - use_pcre_static_flag = true +if not pcre2.found() + pcre2 = dependency('libpcre2-8', required : true, fallback : ['pcre2', 'libpcre2_8']) + use_pcre2_static_flag = true elif host_system == 'windows' - pcre_static = cc.links('''#define PCRE_STATIC - #include - int main() { - void *p = NULL; - pcre_free(p); - return 0; - }''', - dependencies: pcre, - name : 'Windows system PCRE is a static build') - use_pcre_static_flag = pcre_static + pcre2_static = cc.links('''#define PCRE2_STATIC + #define PCRE2_CODE_UNIT_WIDTH 8 + #include + int main() { + void *p = NULL; + pcre2_code_free(p); + return 0; + }''', + dependencies: pcre2, + name : 'Windows system PCRE2 is a static build') + use_pcre2_static_flag = pcre2_static else - use_pcre_static_flag = false + use_pcre2_static_flag = false endif # Import the gvdb sources as a subproject to avoid having the copylib in-tree diff --git a/po/sk.po b/po/sk.po index 8d6a1ced7..747ad2715 100644 --- a/po/sk.po +++ b/po/sk.po @@ -5630,7 +5630,7 @@ msgstr "zlý ofset" msgid "short utf8" msgstr "krátke utf8" -# Ide o omyl programátora: case PCRE_ERROR_RECURSELOOP: return _("recursion loop"); +# Ide o omyl programátora: case PCRE2_ERROR_RECURSELOOP: return _("recursion loop"); #: glib/gregex.c:303 msgid "recursion loop" msgstr "rekurzívna slučka" diff --git a/subprojects/pcre.wrap b/subprojects/pcre.wrap deleted file mode 100644 index 22f524303..000000000 --- a/subprojects/pcre.wrap +++ /dev/null @@ -1,11 +0,0 @@ -[wrap-file] -directory = pcre-8.45 -source_url = https://sourceforge.net/projects/pcre/files/pcre/8.45/pcre-8.45.tar.bz2 -source_filename = pcre-8.45.tar.bz2 -source_hash = 4dae6fdcd2bb0bb6c37b5f97c33c2be954da743985369cddac3546e3218bffb8 -patch_filename = pcre_8.45-1_patch.zip -patch_url = https://wrapdb.mesonbuild.com/v2/pcre_8.45-1/get_patch -patch_hash = 821f9b0e5578c5b3983434465de93e30ddfceb92e331a0c9110aba6bf0634ffa - -[provide] -libpcre = pcre_dep diff --git a/subprojects/pcre2.wrap b/subprojects/pcre2.wrap index d210eb65d..6196f1d0c 100644 --- a/subprojects/pcre2.wrap +++ b/subprojects/pcre2.wrap @@ -3,14 +3,13 @@ directory = pcre2-10.40 source_url = https://github.com/PhilipHazel/pcre2/releases/download/pcre2-10.40/pcre2-10.40.tar.bz2 source_filename = pcre2-10.40.tar.bz2 source_hash = 14e4b83c4783933dc17e964318e6324f7cae1bc75d8f3c79bc6969f00c159d68 -patch_filename = pcre2_10.40-2_patch.zip -patch_url = https://wrapdb.mesonbuild.com/v2/pcre2_10.40-2/get_patch -patch_hash = 6c1bbb3c00ec3917b3abee922dedc19ad8b3a199db65ba893a61241e3428bdbd -wrapdb_version = 10.40-2 +patch_filename = pcre2_10.40-3_patch.zip +patch_url = https://wrapdb.mesonbuild.com/v2/pcre2_10.40-3/get_patch +patch_hash = 95391923529b4c1647a2cf88cd3b59cceb4f92393775e011f530e7865de0c7fb +wrapdb_version = 10.40-3 [provide] libpcre2-8 = -libpcre2_8 libpcre2-16 = -libpcre2_16 libpcre2-32 = -libpcre2_32 libpcre2-posix = -libpcre2_posix -