mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-02-03 01:36:17 +01:00
Update the internal copy of PCRE to 7.4
2007-11-06 Matthias Clasen <mclasen@redhat.com> * glib/pcre/*: Update the internal copy of PCRE to 7.4 svn path=/trunk/; revision=5800
This commit is contained in:
parent
9b79a12192
commit
5b81acfae2
@ -1,3 +1,7 @@
|
||||
2007-11-06 Matthias Clasen <mclasen@redhat.com>
|
||||
|
||||
* glib/pcre/*: Update the internal copy of PCRE to 7.4
|
||||
|
||||
2007-10-22 Tor Lillqvist <tml@novell.com>
|
||||
|
||||
* glib/gutils.c (_glib_gettext): Plug small one-time leak on
|
||||
|
@ -9,12 +9,10 @@ INCLUDES = \
|
||||
-DMAX_NAME_COUNT=10000 \
|
||||
-DMAX_DUPLENGTH=30000 \
|
||||
-DLINK_SIZE=2 \
|
||||
-UEBCDIC \
|
||||
-DPOSIX_MALLOC_THRESHOLD=10 \
|
||||
-I$(top_srcdir) \
|
||||
-I$(srcdir) \
|
||||
-I$(top_srcdir)/glib \
|
||||
-I$(top_builddir)/glib \
|
||||
@GLIB_DEBUG_FLAGS@ \
|
||||
-DG_DISABLE_DEPRECATED \
|
||||
-DGLIB_COMPILATION \
|
||||
@ -28,8 +26,8 @@ noinst_LTLIBRARIES = libpcre.la
|
||||
libpcre_headers =
|
||||
|
||||
libpcre_la_SOURCES = \
|
||||
pcre_chartables.c \
|
||||
pcre_compile.c \
|
||||
pcre_chartables.c \
|
||||
pcre_config.c \
|
||||
pcre_dfa_exec.c \
|
||||
pcre_exec.c \
|
||||
@ -51,7 +49,6 @@ libpcre_la_SOURCES = \
|
||||
pcre.h \
|
||||
pcre_internal.h \
|
||||
ucp.h \
|
||||
ucpinternal.h \
|
||||
$(libpcre_headers)
|
||||
|
||||
libpcre_la_LIBADD = $(DEP_LIBS)
|
||||
|
@ -22,7 +22,7 @@ DEFINES = \
|
||||
-DPOSIX_MALLOC_THRESHOLD=10
|
||||
|
||||
OBJECTS = \
|
||||
pcre_chartables.obj \
|
||||
\.obj \
|
||||
pcre_compile.obj \
|
||||
pcre_config.obj \
|
||||
pcre_dfa_exec.obj \
|
||||
@ -31,6 +31,7 @@ OBJECTS = \
|
||||
pcre_get.obj \
|
||||
pcre_globals.obj \
|
||||
pcre_info.obj \
|
||||
pcre_internal.obj \
|
||||
pcre_maketables.obj \
|
||||
pcre_newline.obj \
|
||||
pcre_ord2utf8.obj \
|
||||
@ -42,6 +43,9 @@ OBJECTS = \
|
||||
pcre_valid_utf8.obj \
|
||||
pcre_version.obj \
|
||||
pcre_xclass.obj \
|
||||
ucp.obj \
|
||||
ucpinternal.obj \
|
||||
ucptable.h.obj \
|
||||
|
||||
all : pcre.lib
|
||||
|
||||
|
@ -42,33 +42,45 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 2
|
||||
#define PCRE_MINOR 4
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2007-06-19
|
||||
#define PCRE_DATE 2007-09-21
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||
don't change an existing definition of PCRE_EXP_DECL. */
|
||||
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
|
||||
|
||||
/* But don't do that when building as part of GLib */
|
||||
#if 0
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
||||
# ifndef PCRE_EXP_DECL
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@ -110,6 +122,8 @@ extern "C" {
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000
|
||||
#define PCRE_NEWLINE_ANY 0x00400000
|
||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
||||
#define PCRE_BSR_ANYCRLF 0x00800000
|
||||
#define PCRE_BSR_UNICODE 0x01000000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
@ -135,7 +149,7 @@ extern "C" {
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22)
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
|
||||
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
@ -155,6 +169,7 @@ extern "C" {
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
#define PCRE_INFO_OKPARTIAL 12
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
#define PCRE_INFO_HASCRORLF 14
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
@ -167,6 +182,7 @@ compatible. */
|
||||
#define PCRE_CONFIG_STACKRECURSE 5
|
||||
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||
#define PCRE_CONFIG_BSR 8
|
||||
|
||||
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
|
||||
these bits, just add new ones on the end, in order to remain compatible. */
|
||||
@ -226,6 +242,13 @@ typedef struct pcre_callout_block {
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre_callout_block;
|
||||
|
||||
|
||||
/* Indirection for store get and free functions. These can be set to
|
||||
alternative malloc/free functions if required. Special ones are used in the
|
||||
non-recursive case for "frames". There is also an optional callout function
|
||||
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||
have to take another form. */
|
||||
|
||||
#include "glib.h"
|
||||
#include "galias.h"
|
||||
|
||||
|
@ -1,3 +1,6 @@
|
||||
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||
* the update of the local copy of PCRE.
|
||||
*/
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
@ -14,12 +17,16 @@ example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
|
||||
The following #include is present because without it gcc 4.x may remove the
|
||||
The following #includes are present because without the gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
const unsigned char _pcre_default_tables[] = {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -41,6 +41,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This module contains the external function pcre_config(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@ -83,6 +87,14 @@ switch (what)
|
||||
*((int *)where) = NEWLINE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_LINK_SIZE:
|
||||
*((int *)where) = LINK_SIZE;
|
||||
break;
|
||||
|
@ -44,6 +44,10 @@ FSM). This is NOT Perl- compatible, but it has advantages in certain
|
||||
applications. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#define NLBLOCK md /* Block containing newline information */
|
||||
#define PSSTART start_subject /* Field containing processed string start */
|
||||
#define PSEND end_subject /* Field containing processed string end */
|
||||
@ -126,7 +130,9 @@ static uschar coptable[] = {
|
||||
0, /* CREF */
|
||||
0, /* RREF */
|
||||
0, /* DEF */
|
||||
0, 0 /* BRAZERO, BRAMINZERO */
|
||||
0, 0, /* BRAZERO, BRAMINZERO */
|
||||
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
|
||||
0, 0 /* FAIL, ACCEPT */
|
||||
};
|
||||
|
||||
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
||||
@ -1074,15 +1080,20 @@ for (;;)
|
||||
int ncount = 0;
|
||||
switch (c)
|
||||
{
|
||||
case 0x000d:
|
||||
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
|
||||
/* Fall through */
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
|
||||
goto ANYNL01;
|
||||
|
||||
case 0x000d:
|
||||
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
|
||||
/* Fall through */
|
||||
|
||||
ANYNL01:
|
||||
case 0x000a:
|
||||
if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
@ -1091,6 +1102,7 @@ for (;;)
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1307,15 +1319,20 @@ for (;;)
|
||||
int ncount = 0;
|
||||
switch (c)
|
||||
{
|
||||
case 0x000d:
|
||||
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
|
||||
/* Fall through */
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
|
||||
goto ANYNL02;
|
||||
|
||||
case 0x000d:
|
||||
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
|
||||
/* Fall through */
|
||||
|
||||
ANYNL02:
|
||||
case 0x000a:
|
||||
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
@ -1324,6 +1341,7 @@ for (;;)
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1539,15 +1557,20 @@ for (;;)
|
||||
int ncount = 0;
|
||||
switch (c)
|
||||
{
|
||||
case 0x000d:
|
||||
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
|
||||
/* Fall through */
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
|
||||
goto ANYNL03;
|
||||
|
||||
case 0x000d:
|
||||
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
|
||||
/* Fall through */
|
||||
|
||||
ANYNL03:
|
||||
case 0x000a:
|
||||
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
@ -1558,6 +1581,7 @@ for (;;)
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1738,14 +1762,17 @@ for (;;)
|
||||
case OP_ANYNL:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
|
||||
|
||||
case 0x000a:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
|
||||
case 0x000d:
|
||||
if (ptr + 1 < end_subject && ptr[1] == 0x0a)
|
||||
{
|
||||
@ -2568,6 +2595,18 @@ md->end_subject = end_subject;
|
||||
md->moptions = options;
|
||||
md->poptions = re->options;
|
||||
|
||||
/* If the BSR option is not set at match time, copy what was set
|
||||
at compile time. */
|
||||
|
||||
if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
|
||||
{
|
||||
if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
|
||||
md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
|
||||
#ifdef BSR_ANYCRLF
|
||||
else md->moptions |= PCRE_BSR_ANYCRLF;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Handle different types of newline. The three bits give eight cases. If
|
||||
nothing is set at run time, whatever was used at compile time applies. */
|
||||
|
||||
@ -2638,7 +2677,7 @@ if (md->tables == NULL) md->tables = _pcre_default_tables;
|
||||
used in a loop when finding where to start. */
|
||||
|
||||
lcc = md->tables + lcc_offset;
|
||||
startline = (re->options & PCRE_STARTLINE) != 0;
|
||||
startline = (re->flags & PCRE_STARTLINE) != 0;
|
||||
firstline = (re->options & PCRE_FIRSTLINE) != 0;
|
||||
|
||||
/* Set up the first character to match, if available. The first_byte value is
|
||||
@ -2649,7 +2688,7 @@ studied, there may be a bitmap of possible first characters. */
|
||||
|
||||
if (!anchored)
|
||||
{
|
||||
if ((re->options & PCRE_FIRSTSET) != 0)
|
||||
if ((re->flags & PCRE_FIRSTSET) != 0)
|
||||
{
|
||||
first_byte = re->first_byte & 255;
|
||||
if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
|
||||
@ -2666,7 +2705,7 @@ if (!anchored)
|
||||
/* For anchored or unanchored matches, there may be a "last known required
|
||||
character" set. */
|
||||
|
||||
if ((re->options & PCRE_REQCHSET) != 0)
|
||||
if ((re->flags & PCRE_REQCHSET) != 0)
|
||||
{
|
||||
req_byte = re->req_byte & 255;
|
||||
req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
|
||||
@ -2836,16 +2875,17 @@ for (;;)
|
||||
}
|
||||
if (current_subject > end_subject) break;
|
||||
|
||||
/* If we have just passed a CR and the newline option is CRLF or ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||
character. */
|
||||
/* If we have just passed a CR and we are now at a LF, and the pattern does
|
||||
not contain any explicit matches for \r or \n, and the newline option is CRLF
|
||||
or ANY or ANYCRLF, advance the match position by one more character. */
|
||||
|
||||
if (current_subject[-1] == '\r' &&
|
||||
(md->nltype == NLTYPE_ANY ||
|
||||
md->nltype == NLTYPE_ANYCRLF ||
|
||||
md->nllen == 2) &&
|
||||
current_subject < end_subject &&
|
||||
*current_subject == '\n')
|
||||
current_subject < end_subject &&
|
||||
*current_subject == '\n' &&
|
||||
(re->flags & PCRE_HASCRORLF) == 0 &&
|
||||
(md->nltype == NLTYPE_ANY ||
|
||||
md->nltype == NLTYPE_ANYCRLF ||
|
||||
md->nllen == 2))
|
||||
current_subject++;
|
||||
|
||||
} /* "Bumpalong" loop */
|
||||
|
@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
pattern matching using an NFA algorithm, trying to mimic Perl as closely as
|
||||
possible. There are also some static supporting functions. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#define NLBLOCK md /* Block containing newline information */
|
||||
#define PSSTART start_subject /* Field containing processed string start */
|
||||
#define PSEND end_subject /* Field containing processed string end */
|
||||
@ -53,16 +57,10 @@ possible. There are also some static supporting functions. */
|
||||
#undef min
|
||||
#undef max
|
||||
|
||||
/* The chain of eptrblocks for tail recursions uses memory in stack workspace,
|
||||
obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
|
||||
|
||||
#define EPTR_WORK_SIZE (1000)
|
||||
|
||||
/* Flag bits for the match() function */
|
||||
|
||||
#define match_condassert 0x01 /* Called to check a condition assertion */
|
||||
#define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
|
||||
#define match_tail_recursed 0x04 /* Tail recursive call */
|
||||
|
||||
/* Non-error returns from the match() function. Error returns are externally
|
||||
defined PCRE_ERROR_xxx codes, which are all negative. */
|
||||
@ -70,6 +68,14 @@ defined PCRE_ERROR_xxx codes, which are all negative. */
|
||||
#define MATCH_MATCH 1
|
||||
#define MATCH_NOMATCH 0
|
||||
|
||||
/* Special internal returns from the match() function. Make them sufficiently
|
||||
negative to avoid the external error codes. */
|
||||
|
||||
#define MATCH_COMMIT (-999)
|
||||
#define MATCH_PRUNE (-998)
|
||||
#define MATCH_SKIP (-997)
|
||||
#define MATCH_THEN (-996)
|
||||
|
||||
/* Maximum number of ints of offset to save on the stack for recursive calls.
|
||||
If the offset vector is bigger, malloc is used. This should be a multiple of 3,
|
||||
because the offset vector is always a multiple of 3 long. */
|
||||
@ -205,15 +211,15 @@ variable instead of being passed in the frame.
|
||||
****************************************************************************
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
/* Numbers for RMATCH calls */
|
||||
/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
|
||||
below must be updated in sync. */
|
||||
|
||||
enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
|
||||
RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
|
||||
RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
|
||||
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
|
||||
RM41, RM42, RM43, RM44, RM45, RM46, RM47 };
|
||||
|
||||
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
|
||||
RM51, RM52, RM53, RM54 };
|
||||
|
||||
/* These versions of the macros use the stack, as normal. There are debugging
|
||||
versions and production versions. Note that the "rw" argument of RMATCH isn't
|
||||
@ -384,7 +390,6 @@ Arguments:
|
||||
match_condassert - this is an assertion condition
|
||||
match_cbegroup - this is the start of an unlimited repeat
|
||||
group that can match an empty string
|
||||
match_tail_recursed - this is a tail_recursed group
|
||||
rdepth the recursion depth
|
||||
|
||||
Returns: MATCH_MATCH if matched ) these values are >= 0
|
||||
@ -586,22 +591,16 @@ original_ims = ims; /* Save for resetting on ')' */
|
||||
string, the match_cbegroup flag is set. When this is the case, add the current
|
||||
subject pointer to the chain of such remembered pointers, to be checked when we
|
||||
hit the closing ket, in order to break infinite loops that match no characters.
|
||||
When match() is called in other circumstances, don't add to the chain. If this
|
||||
is a tail recursion, use a block from the workspace, as the one on the stack is
|
||||
already used. */
|
||||
When match() is called in other circumstances, don't add to the chain. The
|
||||
match_cbegroup flag must NOT be used with tail recursion, because the memory
|
||||
block that is used is on the stack, so a new one may be required for each
|
||||
match(). */
|
||||
|
||||
if ((flags & match_cbegroup) != 0)
|
||||
{
|
||||
eptrblock *p;
|
||||
if ((flags & match_tail_recursed) != 0)
|
||||
{
|
||||
if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
|
||||
p = md->eptrchain + md->eptrn++;
|
||||
}
|
||||
else p = &newptrb;
|
||||
p->epb_saved_eptr = eptr;
|
||||
p->epb_prev = eptrb;
|
||||
eptrb = p;
|
||||
newptrb.epb_saved_eptr = eptr;
|
||||
newptrb.epb_prev = eptrb;
|
||||
eptrb = &newptrb;
|
||||
}
|
||||
|
||||
/* Now start processing the opcodes. */
|
||||
@ -621,6 +620,34 @@ for (;;)
|
||||
|
||||
switch(op)
|
||||
{
|
||||
case OP_FAIL:
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
case OP_PRUNE:
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
|
||||
ims, eptrb, flags, RM51);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
RRETURN(MATCH_PRUNE);
|
||||
|
||||
case OP_COMMIT:
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
|
||||
ims, eptrb, flags, RM52);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
RRETURN(MATCH_COMMIT);
|
||||
|
||||
case OP_SKIP:
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
|
||||
ims, eptrb, flags, RM53);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
md->start_match_ptr = eptr; /* Pass back current position */
|
||||
RRETURN(MATCH_SKIP);
|
||||
|
||||
case OP_THEN:
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
|
||||
ims, eptrb, flags, RM54);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
RRETURN(MATCH_THEN);
|
||||
|
||||
/* Handle a capturing bracket. If there is space in the offset vector, save
|
||||
the current subject position in the working slot at the top of the vector.
|
||||
We mustn't change the current values of the data slot, because they may be
|
||||
@ -662,7 +689,7 @@ for (;;)
|
||||
{
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
|
||||
ims, eptrb, flags, RM1);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
md->capture_last = save_capture_last;
|
||||
ecode += GET(ecode, 1);
|
||||
}
|
||||
@ -677,15 +704,22 @@ for (;;)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
|
||||
/* Insufficient room for saving captured contents. Treat as a non-capturing
|
||||
bracket. */
|
||||
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
|
||||
as a non-capturing bracket. */
|
||||
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
|
||||
DPRINTF(("insufficient capture room: treat as non-capturing\n"));
|
||||
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
|
||||
/* Non-capturing bracket. Loop for all the alternatives. When we get to the
|
||||
final alternative within the brackets, we would return the result of a
|
||||
recursive call to match() whatever happened. We can reduce stack usage by
|
||||
turning this into a tail recursion. */
|
||||
turning this into a tail recursion, except in the case when match_cbegroup
|
||||
is set.*/
|
||||
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
@ -693,12 +727,20 @@ for (;;)
|
||||
flags = (op >= OP_SBRA)? match_cbegroup : 0;
|
||||
for (;;)
|
||||
{
|
||||
if (ecode[GET(ecode, 1)] != OP_ALT)
|
||||
if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
|
||||
{
|
||||
ecode += _pcre_OP_lengths[*ecode];
|
||||
flags |= match_tail_recursed;
|
||||
DPRINTF(("bracket 0 tail recursion\n"));
|
||||
goto TAIL_RECURSE;
|
||||
if (flags == 0) /* Not a possibly empty group */
|
||||
{
|
||||
ecode += _pcre_OP_lengths[*ecode];
|
||||
DPRINTF(("bracket 0 tail recursion\n"));
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
|
||||
/* Possibly empty group; can't use tail recursion. */
|
||||
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
|
||||
eptrb, flags, RM48);
|
||||
RRETURN(rrc);
|
||||
}
|
||||
|
||||
/* For non-final alternatives, continue the loop for a NOMATCH result;
|
||||
@ -706,7 +748,7 @@ for (;;)
|
||||
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
|
||||
eptrb, flags, RM2);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
ecode += GET(ecode, 1);
|
||||
}
|
||||
/* Control never reaches here. */
|
||||
@ -754,7 +796,7 @@ for (;;)
|
||||
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
|
||||
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
|
||||
}
|
||||
else if (rrc != MATCH_NOMATCH)
|
||||
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
|
||||
{
|
||||
RRETURN(rrc); /* Need braces because of following else */
|
||||
}
|
||||
@ -766,25 +808,36 @@ for (;;)
|
||||
}
|
||||
|
||||
/* We are now at the branch that is to be obeyed. As there is only one,
|
||||
we can use tail recursion to avoid using another stack frame. If the second
|
||||
alternative doesn't exist, we can just plough on. */
|
||||
we can use tail recursion to avoid using another stack frame, except when
|
||||
match_cbegroup is required for an unlimited repeat of a possibly empty
|
||||
group. If the second alternative doesn't exist, we can just plough on. */
|
||||
|
||||
if (condition || *ecode == OP_ALT)
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
|
||||
goto TAIL_RECURSE;
|
||||
if (op == OP_SCOND) /* Possibly empty group */
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
|
||||
RRETURN(rrc);
|
||||
}
|
||||
else /* Group must match something */
|
||||
{
|
||||
flags = 0;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
else /* Condition false & no 2nd alternative */
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
/* End of the pattern. If we are in a top-level recursion, we should
|
||||
restore the offsets appropriately and continue from after the call. */
|
||||
/* End of the pattern, either real or forced. If we are in a top-level
|
||||
recursion, we should restore the offsets appropriately and continue from
|
||||
after the call. */
|
||||
|
||||
case OP_ACCEPT:
|
||||
case OP_END:
|
||||
if (md->recursive != NULL && md->recursive->group_num == 0)
|
||||
{
|
||||
@ -805,7 +858,7 @@ for (;;)
|
||||
if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
|
||||
md->end_match_ptr = eptr; /* Record where we ended */
|
||||
md->end_offset_top = offset_top; /* and how many extracts were taken */
|
||||
md->start_match_ptr = mstart; /* and the start (\K can modify) */
|
||||
md->start_match_ptr = mstart; /* and the start (\K can modify) */
|
||||
RRETURN(MATCH_MATCH);
|
||||
|
||||
/* Change option settings */
|
||||
@ -829,7 +882,7 @@ for (;;)
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
|
||||
RM4);
|
||||
if (rrc == MATCH_MATCH) break;
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
ecode += GET(ecode, 1);
|
||||
}
|
||||
while (*ecode == OP_ALT);
|
||||
@ -856,7 +909,7 @@ for (;;)
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
|
||||
RM5);
|
||||
if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
ecode += GET(ecode,1);
|
||||
}
|
||||
while (*ecode == OP_ALT);
|
||||
@ -880,7 +933,7 @@ for (;;)
|
||||
{
|
||||
eptr--;
|
||||
if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
|
||||
BACKCHAR(eptr)
|
||||
BACKCHAR(eptr);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -993,7 +1046,7 @@ for (;;)
|
||||
(pcre_free)(new_recursive.offset_save);
|
||||
RRETURN(MATCH_MATCH);
|
||||
}
|
||||
else if (rrc != MATCH_NOMATCH)
|
||||
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
|
||||
{
|
||||
DPRINTF(("Recursion gave error %d\n", rrc));
|
||||
RRETURN(rrc);
|
||||
@ -1027,10 +1080,9 @@ for (;;)
|
||||
|
||||
do
|
||||
{
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
|
||||
eptrb, 0, RM7);
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
|
||||
if (rrc == MATCH_MATCH) break;
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
ecode += GET(ecode,1);
|
||||
}
|
||||
while (*ecode == OP_ALT);
|
||||
@ -1073,11 +1125,10 @@ for (;;)
|
||||
|
||||
if (*ecode == OP_KETRMIN)
|
||||
{
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
|
||||
RM8);
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode = prev;
|
||||
flags = match_tail_recursed;
|
||||
flags = 0;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
else /* OP_KETRMAX */
|
||||
@ -1085,7 +1136,7 @@ for (;;)
|
||||
RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode += 1 + LINK_SIZE;
|
||||
flags = match_tail_recursed;
|
||||
flags = 0;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
/* Control never gets here */
|
||||
@ -1216,17 +1267,21 @@ for (;;)
|
||||
|
||||
/* The repeating kets try the rest of the pattern or restart from the
|
||||
preceding bracket, in the appropriate order. In the second case, we can use
|
||||
tail recursion to avoid using another stack frame. */
|
||||
tail recursion to avoid using another stack frame, unless we have an
|
||||
unlimited repeat of a group that can match an empty string. */
|
||||
|
||||
flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
|
||||
|
||||
if (*ecode == OP_KETRMIN)
|
||||
{
|
||||
RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
|
||||
RM12);
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (flags != 0) /* Could match an empty string */
|
||||
{
|
||||
RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
|
||||
RRETURN(rrc);
|
||||
}
|
||||
ecode = prev;
|
||||
flags |= match_tail_recursed;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
else /* OP_KETRMAX */
|
||||
@ -1234,7 +1289,7 @@ for (;;)
|
||||
RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode += 1 + LINK_SIZE;
|
||||
flags = match_tail_recursed;
|
||||
flags = 0;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
/* Control never gets here */
|
||||
@ -1471,12 +1526,16 @@ for (;;)
|
||||
case 0x000d:
|
||||
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
|
||||
break;
|
||||
|
||||
case 0x000a:
|
||||
break;
|
||||
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
ecode++;
|
||||
@ -2033,7 +2092,7 @@ for (;;)
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (eptr-- == pp) break; /* Stop if tried at original pos */
|
||||
BACKCHAR(eptr)
|
||||
if (utf8) BACKCHAR(eptr);
|
||||
}
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@ -2899,12 +2958,16 @@ for (;;)
|
||||
case 0x000d:
|
||||
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
|
||||
break;
|
||||
|
||||
case 0x000a:
|
||||
break;
|
||||
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -3038,9 +3101,9 @@ for (;;)
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject ||
|
||||
(*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
|
||||
(*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -3058,9 +3121,9 @@ for (;;)
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject ||
|
||||
(*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
|
||||
(*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -3117,9 +3180,12 @@ for (;;)
|
||||
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
|
||||
break;
|
||||
case 0x000a:
|
||||
break;
|
||||
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x0085:
|
||||
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -3371,11 +3437,14 @@ for (;;)
|
||||
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
|
||||
break;
|
||||
case 0x000a:
|
||||
break;
|
||||
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@ -3527,10 +3596,14 @@ for (;;)
|
||||
case 0x000d:
|
||||
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
|
||||
break;
|
||||
|
||||
case 0x000a:
|
||||
break;
|
||||
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x0085:
|
||||
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@ -3702,7 +3775,7 @@ for (;;)
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (eptr-- == pp) break; /* Stop if tried at original pos */
|
||||
BACKCHAR(eptr);
|
||||
if (utf8) BACKCHAR(eptr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3741,9 +3814,9 @@ for (;;)
|
||||
for (;;) /* Move back over one extended */
|
||||
{
|
||||
int len = 1;
|
||||
BACKCHAR(eptr);
|
||||
if (!utf8) c = *eptr; else
|
||||
{
|
||||
BACKCHAR(eptr);
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
@ -3764,11 +3837,6 @@ for (;;)
|
||||
switch(ctype)
|
||||
{
|
||||
case OP_ANY:
|
||||
|
||||
/* Special code is required for UTF8, but when the maximum is
|
||||
unlimited we don't need it, so we repeat the non-UTF8 code. This is
|
||||
probably worth it, because .* is quite a common idiom. */
|
||||
|
||||
if (max < INT_MAX)
|
||||
{
|
||||
if ((ims & PCRE_DOTALL) == 0)
|
||||
@ -3801,15 +3869,12 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
c = max - min;
|
||||
if (c > (unsigned int)(md->end_subject - eptr))
|
||||
c = md->end_subject - eptr;
|
||||
eptr += c;
|
||||
eptr = md->end_subject;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -3836,8 +3901,10 @@ for (;;)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c != 0x000a && c != 0x000b && c != 0x000c &&
|
||||
c != 0x0085 && c != 0x2028 && c != 0x2029)
|
||||
if (c != 0x000a &&
|
||||
(md->bsr_anycrlf ||
|
||||
(c != 0x000b && c != 0x000c &&
|
||||
c != 0x0085 && c != 0x2028 && c != 0x2029)))
|
||||
break;
|
||||
eptr += len;
|
||||
}
|
||||
@ -3990,7 +4057,7 @@ for (;;)
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
|
||||
/* Not UTF-8 mode */
|
||||
{
|
||||
@ -4027,7 +4094,9 @@ for (;;)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
|
||||
if (c != 0x000a &&
|
||||
(md->bsr_anycrlf ||
|
||||
(c != 0x000b && c != 0x000c && c != 0x0085)))
|
||||
break;
|
||||
eptr++;
|
||||
}
|
||||
@ -4177,11 +4246,17 @@ HEAP_RETURN:
|
||||
switch (frame->Xwhere)
|
||||
{
|
||||
LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
|
||||
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
|
||||
LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
|
||||
LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
|
||||
LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
|
||||
LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
|
||||
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
|
||||
LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
|
||||
LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
|
||||
LBL(53) LBL(54)
|
||||
#ifdef SUPPORT_UTF8
|
||||
LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
|
||||
LBL(32) LBL(34) LBL(42) LBL(46)
|
||||
#ifdef SUPPORT_UCP
|
||||
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
|
||||
#endif /* SUPPORT_UCP */
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
default:
|
||||
DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
|
||||
return PCRE_ERROR_INTERNAL;
|
||||
@ -4298,7 +4373,6 @@ const uschar *start_bits = NULL;
|
||||
USPTR start_match = (USPTR)subject + start_offset;
|
||||
USPTR end_subject;
|
||||
USPTR req_byte_ptr = start_match - 1;
|
||||
eptrblock eptrchain[EPTR_WORK_SIZE];
|
||||
|
||||
pcre_study_data internal_study;
|
||||
const pcre_study_data *study;
|
||||
@ -4361,7 +4435,7 @@ if (re->magic_number != MAGIC_NUMBER)
|
||||
/* Set up other data */
|
||||
|
||||
anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
|
||||
startline = (re->options & PCRE_STARTLINE) != 0;
|
||||
startline = (re->flags & PCRE_STARTLINE) != 0;
|
||||
firstline = (re->options & PCRE_FIRSTLINE) != 0;
|
||||
|
||||
/* The code starts after the real_pcre block and the capture name table. */
|
||||
@ -4384,16 +4458,41 @@ md->partial = (options & PCRE_PARTIAL) != 0;
|
||||
md->hitend = FALSE;
|
||||
|
||||
md->recursive = NULL; /* No recursion at top level */
|
||||
md->eptrchain = eptrchain; /* Make workspace generally available */
|
||||
|
||||
md->lcc = tables + lcc_offset;
|
||||
md->ctypes = tables + ctypes_offset;
|
||||
|
||||
/* Handle different \R options. */
|
||||
|
||||
switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
|
||||
{
|
||||
case 0:
|
||||
if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
|
||||
md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
|
||||
else
|
||||
#ifdef BSR_ANYCRLF
|
||||
md->bsr_anycrlf = TRUE;
|
||||
#else
|
||||
md->bsr_anycrlf = FALSE;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_BSR_ANYCRLF:
|
||||
md->bsr_anycrlf = TRUE;
|
||||
break;
|
||||
|
||||
case PCRE_BSR_UNICODE:
|
||||
md->bsr_anycrlf = FALSE;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADNEWLINE;
|
||||
}
|
||||
|
||||
/* Handle different types of newline. The three bits give eight cases. If
|
||||
nothing is set at run time, whatever was used at compile time applies. */
|
||||
|
||||
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
|
||||
PCRE_NEWLINE_BITS)
|
||||
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
|
||||
(pcre_uint32)options) & PCRE_NEWLINE_BITS)
|
||||
{
|
||||
case 0: newline = NEWLINE; break; /* Compile-time default */
|
||||
case PCRE_NEWLINE_CR: newline = '\r'; break;
|
||||
@ -4432,7 +4531,7 @@ else
|
||||
/* Partial matching is supported only for a restricted set of regexes at the
|
||||
moment. */
|
||||
|
||||
if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
|
||||
if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
|
||||
return PCRE_ERROR_BADPARTIAL;
|
||||
|
||||
/* Check a UTF-8 string if required. Unfortunately there's no way of passing
|
||||
@ -4509,7 +4608,7 @@ studied, there may be a bitmap of possible first characters. */
|
||||
|
||||
if (!anchored)
|
||||
{
|
||||
if ((re->options & PCRE_FIRSTSET) != 0)
|
||||
if ((re->flags & PCRE_FIRSTSET) != 0)
|
||||
{
|
||||
first_byte = re->first_byte & 255;
|
||||
if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
|
||||
@ -4524,7 +4623,7 @@ if (!anchored)
|
||||
/* For anchored or unanchored matches, there may be a "last known required
|
||||
character" set. */
|
||||
|
||||
if ((re->options & PCRE_REQCHSET) != 0)
|
||||
if ((re->flags & PCRE_REQCHSET) != 0)
|
||||
{
|
||||
req_byte = re->req_byte & 255;
|
||||
req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
|
||||
@ -4540,6 +4639,7 @@ the loop runs just once. */
|
||||
for(;;)
|
||||
{
|
||||
USPTR save_end_subject = end_subject;
|
||||
USPTR new_start_match;
|
||||
|
||||
/* Reset the maximum number of extractions we might see. */
|
||||
|
||||
@ -4680,15 +4780,48 @@ for(;;)
|
||||
|
||||
/* OK, we can now run the match. */
|
||||
|
||||
md->start_match_ptr = start_match; /* Insurance */
|
||||
md->start_match_ptr = start_match;
|
||||
md->match_call_count = 0;
|
||||
md->eptrn = 0; /* Next free eptrchain slot */
|
||||
rc = match(start_match, md->start_code, start_match, 2, md,
|
||||
ims, NULL, 0, 0);
|
||||
rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
|
||||
|
||||
/* Any return other than MATCH_NOMATCH breaks the loop. */
|
||||
switch(rc)
|
||||
{
|
||||
/* NOMATCH and PRUNE advance by one character. THEN at this level acts
|
||||
exactly like PRUNE. */
|
||||
|
||||
if (rc != MATCH_NOMATCH) break;
|
||||
case MATCH_NOMATCH:
|
||||
case MATCH_PRUNE:
|
||||
case MATCH_THEN:
|
||||
new_start_match = start_match + 1;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
|
||||
new_start_match++;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* SKIP passes back the next starting point explicitly. */
|
||||
|
||||
case MATCH_SKIP:
|
||||
new_start_match = md->start_match_ptr;
|
||||
break;
|
||||
|
||||
/* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
|
||||
|
||||
case MATCH_COMMIT:
|
||||
rc = MATCH_NOMATCH;
|
||||
goto ENDLOOP;
|
||||
|
||||
/* Any other return is some kind of error. */
|
||||
|
||||
default:
|
||||
goto ENDLOOP;
|
||||
}
|
||||
|
||||
/* Control reaches here for the various types of "no match at this point"
|
||||
result. Reset the code to MATCH_NOMATCH for subsequent checking. */
|
||||
|
||||
rc = MATCH_NOMATCH;
|
||||
|
||||
/* If PCRE_FIRSTLINE is set, the match must happen before or at the first
|
||||
newline in the subject (though it may continue over the newline). Therefore,
|
||||
@ -4696,30 +4829,26 @@ for(;;)
|
||||
|
||||
if (firstline && IS_NEWLINE(start_match)) break;
|
||||
|
||||
/* Advance the match position by one character. */
|
||||
/* Advance to new matching position */
|
||||
|
||||
start_match++;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
|
||||
start_match++;
|
||||
#endif
|
||||
start_match = new_start_match;
|
||||
|
||||
/* Break the loop if the pattern is anchored or if we have passed the end of
|
||||
the subject. */
|
||||
|
||||
if (anchored || start_match > end_subject) break;
|
||||
|
||||
/* If we have just passed a CR and the newline option is CRLF or ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||
character. */
|
||||
/* If we have just passed a CR and we are now at a LF, and the pattern does
|
||||
not contain any explicit matches for \r or \n, and the newline option is CRLF
|
||||
or ANY or ANYCRLF, advance the match position by one more character. */
|
||||
|
||||
if (start_match[-1] == '\r' &&
|
||||
(md->nltype == NLTYPE_ANY ||
|
||||
md->nltype == NLTYPE_ANYCRLF ||
|
||||
md->nllen == 2) &&
|
||||
start_match < end_subject &&
|
||||
*start_match == '\n')
|
||||
start_match < end_subject &&
|
||||
*start_match == '\n' &&
|
||||
(re->flags & PCRE_HASCRORLF) == 0 &&
|
||||
(md->nltype == NLTYPE_ANY ||
|
||||
md->nltype == NLTYPE_ANYCRLF ||
|
||||
md->nllen == 2))
|
||||
start_match++;
|
||||
|
||||
} /* End of for(;;) "bumpalong" loop */
|
||||
@ -4729,7 +4858,7 @@ for(;;)
|
||||
/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
|
||||
conditions is true:
|
||||
|
||||
(1) The pattern is anchored;
|
||||
(1) The pattern is anchored or the match was failed by (*COMMIT);
|
||||
|
||||
(2) We are past the end of the subject;
|
||||
|
||||
@ -4744,6 +4873,8 @@ processing, copy those that we can. In this case there need not be overflow if
|
||||
certain parts of the pattern were not used, even though there are more
|
||||
capturing parentheses than vector slots. */
|
||||
|
||||
ENDLOOP:
|
||||
|
||||
if (rc == MATCH_MATCH)
|
||||
{
|
||||
if (using_temporary_offsets)
|
||||
|
@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
information about a compiled pattern. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@ -106,8 +110,8 @@ switch (what)
|
||||
|
||||
case PCRE_INFO_FIRSTBYTE:
|
||||
*((int *)where) =
|
||||
((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
break;
|
||||
|
||||
/* Make sure we pass back the pointer to the bit vector in the external
|
||||
@ -121,7 +125,7 @@ switch (what)
|
||||
|
||||
case PCRE_INFO_LASTLITERAL:
|
||||
*((int *)where) =
|
||||
((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
||||
((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMEENTRYSIZE:
|
||||
@ -141,11 +145,15 @@ switch (what)
|
||||
break;
|
||||
|
||||
case PCRE_INFO_OKPARTIAL:
|
||||
*((int *)where) = (re->options & PCRE_NOPARTIAL) == 0;
|
||||
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JCHANGED:
|
||||
*((int *)where) = (re->options & PCRE_JCHANGED) != 0;
|
||||
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_HASCRORLF:
|
||||
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
|
@ -43,6 +43,10 @@ from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@ -185,7 +189,7 @@ const real_pcre *re = (const real_pcre *)code;
|
||||
int entrysize;
|
||||
char *first, *last;
|
||||
uschar *entry;
|
||||
if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
|
||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||
return pcre_get_stringnumber(code, stringname);
|
||||
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||
if (entrysize <= 0) return entrysize;
|
||||
|
@ -46,6 +46,10 @@ indirection. These values can be changed by the caller, but are shared between
|
||||
all threads. However, when compiling for Virtual Pascal, things are done
|
||||
differently, and global variables are not used (see pcre.in). */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
|
@ -43,6 +43,10 @@ information about a compiled pattern. However, use of this function is now
|
||||
deprecated, as it has been superseded by pcre_fullinfo(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@ -81,8 +85,8 @@ if (re->magic_number != MAGIC_NUMBER)
|
||||
}
|
||||
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
|
||||
if (first_byte != NULL)
|
||||
*first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
*first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
return re->top_bracket;
|
||||
}
|
||||
|
||||
|
@ -67,10 +67,6 @@ be absolutely sure we get our version. */
|
||||
#endif
|
||||
|
||||
|
||||
/* Get the definitions provided by running "configure" */
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/* Standard C headers plus the external interface definition. The only time
|
||||
setjmp and stdarg are used is when NO_RECURSE is set. */
|
||||
|
||||
@ -112,7 +108,7 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifdef DLL_EXPORT
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRE_EXP_DEFN __declspec(dllexport)
|
||||
# define PCRE_EXP_DATA_DEFN __declspec(dllexport)
|
||||
@ -121,7 +117,6 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
|
||||
# define PCRE_EXP_DEFN
|
||||
# define PCRE_EXP_DATA_DEFN
|
||||
# endif
|
||||
#
|
||||
# else
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
@ -234,7 +229,7 @@ must begin with PCRE_. */
|
||||
/* Include the public PCRE header and the definitions of UCP character property
|
||||
values. */
|
||||
|
||||
#include <pcre.h>
|
||||
#include "pcre.h"
|
||||
#include "ucp.h"
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
@ -363,7 +358,9 @@ capturing parenthesis numbers in back references. */
|
||||
|
||||
/* When UTF-8 encoding is being used, a character is no longer just a single
|
||||
byte. The macros for character handling generate simple sequences when used in
|
||||
byte-mode, and more complicated ones for UTF-8 characters. */
|
||||
byte-mode, and more complicated ones for UTF-8 characters. BACKCHAR should
|
||||
never be called in byte mode. To make sure it can never even appear when UTF-8
|
||||
support is omitted, we don't even define it. */
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
@ -371,7 +368,7 @@ byte-mode, and more complicated ones for UTF-8 characters. */
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||
#define BACKCHAR(eptr)
|
||||
/* #define BACKCHAR(eptr) */
|
||||
|
||||
#else /* SUPPORT_UTF8 */
|
||||
|
||||
@ -464,9 +461,10 @@ if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
||||
}
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. Called only in UTF-8 mode. */
|
||||
it is. This is called only in UTF-8 mode - we don't put a test within the macro
|
||||
because almost all calls are already within a block of UTF-8 only code. */
|
||||
|
||||
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;
|
||||
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
|
||||
|
||||
#endif
|
||||
|
||||
@ -483,17 +481,16 @@ Standard C system should have one. */
|
||||
|
||||
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
|
||||
|
||||
/* Private options flags start at the most significant end of the four bytes.
|
||||
The public options defined in pcre.h start at the least significant end. Make
|
||||
sure they don't overlap! The bits are getting a bit scarce now -- when we run
|
||||
out, there is a dummy word in the structure that could be used for the private
|
||||
bits. */
|
||||
/* Private flags containing information about the compiled regex. They used to
|
||||
live at the top end of the options word, but that got almost full, so now they
|
||||
are in a 16-bit flags word. */
|
||||
|
||||
#define PCRE_NOPARTIAL 0x80000000 /* can't use partial with this regex */
|
||||
#define PCRE_FIRSTSET 0x40000000 /* first_byte is set */
|
||||
#define PCRE_REQCHSET 0x20000000 /* req_byte is set */
|
||||
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
|
||||
#define PCRE_JCHANGED 0x08000000 /* j option changes within regex */
|
||||
#define PCRE_NOPARTIAL 0x0001 /* can't use partial with this regex */
|
||||
#define PCRE_FIRSTSET 0x0002 /* first_byte is set */
|
||||
#define PCRE_REQCHSET 0x0004 /* req_byte is set */
|
||||
#define PCRE_STARTLINE 0x0008 /* start after \n for multiline */
|
||||
#define PCRE_JCHANGED 0x0010 /* j option used in regex */
|
||||
#define PCRE_HASCRORLF 0x0020 /* explicit \r or \n in pattern */
|
||||
|
||||
/* Options for the "extra" block produced by pcre_study(). */
|
||||
|
||||
@ -509,15 +506,16 @@ time, run time, or study time, respectively. */
|
||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
||||
PCRE_DUPNAMES|PCRE_NEWLINE_BITS)
|
||||
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
|
||||
|
||||
#define PUBLIC_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
PCRE_PARTIAL|PCRE_NEWLINE_BITS)
|
||||
PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
|
||||
|
||||
#define PUBLIC_DFA_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS)
|
||||
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \
|
||||
PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
|
||||
|
||||
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
|
||||
|
||||
@ -607,14 +605,9 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||
ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
|
||||
|
||||
|
||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
|
||||
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
|
||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
||||
|
||||
To keep stored, compiled patterns compatible, new opcodes should be added
|
||||
immediately before OP_BRA, where (since release 7.0) a gap is left for this
|
||||
purpose.
|
||||
|
||||
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
|
||||
that follow must also be updated to match. There is also a table called
|
||||
"coptable" in pcre_dfa_exec.c that must be updated. */
|
||||
@ -741,7 +734,7 @@ enum {
|
||||
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||
|
||||
OP_ONCE, /* 92 Atomic group */
|
||||
OP_BRA, /* 83 Start of non-capturing bracket */
|
||||
OP_BRA, /* 93 Start of non-capturing bracket */
|
||||
OP_CBRA, /* 94 Start of capturing bracket */
|
||||
OP_COND, /* 95 Conditional group */
|
||||
|
||||
@ -757,7 +750,19 @@ enum {
|
||||
OP_DEF, /* 101 The DEFINE condition */
|
||||
|
||||
OP_BRAZERO, /* 102 These two must remain together and in this */
|
||||
OP_BRAMINZERO /* 103 order. */
|
||||
OP_BRAMINZERO, /* 103 order. */
|
||||
|
||||
/* These are backtracking control verbs */
|
||||
|
||||
OP_PRUNE, /* 104 */
|
||||
OP_SKIP, /* 105 */
|
||||
OP_THEN, /* 106 */
|
||||
OP_COMMIT, /* 107 */
|
||||
|
||||
/* These are forced failure and success verbs */
|
||||
|
||||
OP_FAIL, /* 108 */
|
||||
OP_ACCEPT /* 109 */
|
||||
};
|
||||
|
||||
|
||||
@ -780,8 +785,9 @@ for debugging. The macro is referenced only in pcre_printint.c. */
|
||||
"class", "nclass", "xclass", "Ref", "Recurse", "Callout", \
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
|
||||
"AssertB", "AssertB not", "Reverse", \
|
||||
"Once", "Bra 0", "Bra", "Cond", "SBra 0", "SBra", "SCond", \
|
||||
"Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero"
|
||||
"Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \
|
||||
"Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero", \
|
||||
"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT"
|
||||
|
||||
|
||||
/* This macro defines the length of fixed length operations in the compiled
|
||||
@ -845,6 +851,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
3, /* RREF */ \
|
||||
1, /* DEF */ \
|
||||
1, 1, /* BRAZERO, BRAMINZERO */ \
|
||||
1, 1, 1, 1, /* PRUNE, SKIP, THEN, COMMIT, */ \
|
||||
1, 1 /* FAIL, ACCEPT */
|
||||
|
||||
|
||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||
@ -859,7 +867,8 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58 };
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||
ERR60, ERR61 };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
@ -881,9 +890,9 @@ NOTE NOTE NOTE:
|
||||
typedef struct real_pcre {
|
||||
pcre_uint32 magic_number;
|
||||
pcre_uint32 size; /* Total that was malloced */
|
||||
pcre_uint32 options;
|
||||
pcre_uint32 dummy1; /* For future use, maybe */
|
||||
|
||||
pcre_uint32 options; /* Public options */
|
||||
pcre_uint16 flags; /* Private flags */
|
||||
pcre_uint16 dummy1; /* For future use */
|
||||
pcre_uint16 top_bracket;
|
||||
pcre_uint16 top_backref;
|
||||
pcre_uint16 first_byte;
|
||||
@ -926,8 +935,9 @@ typedef struct compile_data {
|
||||
int top_backref; /* Maximum back reference */
|
||||
unsigned int backref_map; /* Bitmap of low back refs */
|
||||
int external_options; /* External (initial) options */
|
||||
int external_flags; /* External flag bits to be set */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL nopartial; /* Set TRUE if partial won't work */
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
int nltype; /* Newline type */
|
||||
int nllen; /* Newline string length */
|
||||
uschar nl[4]; /* Newline string when fixed length */
|
||||
@ -986,6 +996,7 @@ typedef struct match_data {
|
||||
BOOL notempty; /* Empty string match not wanted */
|
||||
BOOL partial; /* PARTIAL flag */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */
|
||||
const uschar *start_code; /* For use when recursing */
|
||||
USPTR start_subject; /* Start of the subject string */
|
||||
USPTR end_subject; /* End of the subject string */
|
||||
@ -1050,10 +1061,12 @@ total length. */
|
||||
#define tables_length (ctypes_offset + 256)
|
||||
|
||||
/* Layout of the UCP type table that translates property names into types and
|
||||
codes. */
|
||||
codes. Each entry used to point directly to a name, but to reduce the number of
|
||||
relocations in shared libraries, it now has an offset into a single string
|
||||
instead. */
|
||||
|
||||
typedef struct {
|
||||
pcre_uint16 offset;
|
||||
pcre_uint16 name_offset;
|
||||
pcre_uint16 type;
|
||||
pcre_uint16 value;
|
||||
} ucp_type_table;
|
||||
@ -1071,7 +1084,7 @@ extern const uschar _pcre_utf8_table4[];
|
||||
|
||||
extern const int _pcre_utf8_table1_size;
|
||||
|
||||
extern const char _pcre_ucp_names[];
|
||||
extern const char _pcre_utt_names[];
|
||||
extern const ucp_type_table _pcre_utt[];
|
||||
extern const int _pcre_utt_size;
|
||||
|
||||
|
@ -45,7 +45,10 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
|
||||
|
||||
#ifndef DFTABLES
|
||||
#include "pcre_internal.h"
|
||||
# ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
# endif
|
||||
# include "pcre_internal.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -47,6 +47,10 @@ and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@ -124,12 +128,16 @@ _pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
|
||||
{
|
||||
int c;
|
||||
ptr--;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
#else /* no UTF-8 support */
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
|
@ -41,6 +41,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
@ -43,6 +43,11 @@ auxiliary function that can be used to maintain a reference count in a compiled
|
||||
pattern data block. This might be helpful in applications where the block is
|
||||
shared by different users. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
supporting functions. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@ -523,7 +527,8 @@ code = (uschar *)re + re->name_table_offset +
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
|
||||
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
if ((re->options & PCRE_ANCHORED) != 0 ||
|
||||
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
return NULL;
|
||||
|
||||
/* Set the character tables in the block that is passed around */
|
||||
|
@ -44,6 +44,10 @@ uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||
clashes with the library. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@ -83,131 +87,137 @@ const uschar _pcre_utf8_table4[] = {
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
/* This table translates Unicode property names into type and code values. It
|
||||
is searched by binary chop, so must be in collating sequence of name. */
|
||||
/* The pcre_utt[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
field of each entry. However, that leads to a large number of relocations when
|
||||
a shared library is dynamically loaded. A significant reduction is made by
|
||||
putting all the names into a single, large string and then using offsets in the
|
||||
table itself. Maintenance is more error-prone, but frequent changes to this
|
||||
data is unlikely. */
|
||||
|
||||
const char _pcre_ucp_names[] =
|
||||
"Any\0"
|
||||
"Arabic\0"
|
||||
"Armenian\0"
|
||||
"Balinese\0"
|
||||
"Bengali\0"
|
||||
"Bopomofo\0"
|
||||
"Braille\0"
|
||||
"Buginese\0"
|
||||
"Buhid\0"
|
||||
"C\0"
|
||||
"Canadian_Aboriginal\0"
|
||||
"Cc\0"
|
||||
"Cf\0"
|
||||
"Cherokee\0"
|
||||
"Cn\0"
|
||||
"Co\0"
|
||||
"Common\0"
|
||||
"Coptic\0"
|
||||
"Cs\0"
|
||||
"Cuneiform\0"
|
||||
"Cypriot\0"
|
||||
"Cyrillic\0"
|
||||
"Deseret\0"
|
||||
"Devanagari\0"
|
||||
"Ethiopic\0"
|
||||
"Georgian\0"
|
||||
"Glagolitic\0"
|
||||
"Gothic\0"
|
||||
"Greek\0"
|
||||
"Gujarati\0"
|
||||
"Gurmukhi\0"
|
||||
"Han\0"
|
||||
"Hangul\0"
|
||||
"Hanunoo\0"
|
||||
"Hebrew\0"
|
||||
"Hiragana\0"
|
||||
"Inherited\0"
|
||||
"Kannada\0"
|
||||
"Katakana\0"
|
||||
"Kharoshthi\0"
|
||||
"Khmer\0"
|
||||
"L\0"
|
||||
"L&\0"
|
||||
"Lao\0"
|
||||
"Latin\0"
|
||||
"Limbu\0"
|
||||
"Linear_B\0"
|
||||
"Ll\0"
|
||||
"Lm\0"
|
||||
"Lo\0"
|
||||
"Lt\0"
|
||||
"Lu\0"
|
||||
"M\0"
|
||||
"Malayalam\0"
|
||||
"Mc\0"
|
||||
"Me\0"
|
||||
"Mn\0"
|
||||
"Mongolian\0"
|
||||
"Myanmar\0"
|
||||
"N\0"
|
||||
"Nd\0"
|
||||
"New_Tai_Lue\0"
|
||||
"Nko\0"
|
||||
"Nl\0"
|
||||
"No\0"
|
||||
"Ogham\0"
|
||||
"Old_Italic\0"
|
||||
"Old_Persian\0"
|
||||
"Oriya\0"
|
||||
"Osmanya\0"
|
||||
"P\0"
|
||||
"Pc\0"
|
||||
"Pd\0"
|
||||
"Pe\0"
|
||||
"Pf\0"
|
||||
"Phags_Pa\0"
|
||||
"Phoenician\0"
|
||||
"Pi\0"
|
||||
"Po\0"
|
||||
"Ps\0"
|
||||
"Runic\0"
|
||||
"S\0"
|
||||
"Sc\0"
|
||||
"Shavian\0"
|
||||
"Sinhala\0"
|
||||
"Sk\0"
|
||||
"Sm\0"
|
||||
"So\0"
|
||||
"Syloti_Nagri\0"
|
||||
"Syriac\0"
|
||||
"Tagalog\0"
|
||||
"Tagbanwa\0"
|
||||
"Tai_Le\0"
|
||||
"Tamil\0"
|
||||
"Telugu\0"
|
||||
"Thaana\0"
|
||||
"Thai\0"
|
||||
"Tibetan\0"
|
||||
"Tifinagh\0"
|
||||
"Ugaritic\0"
|
||||
"Yi\0"
|
||||
"Z\0"
|
||||
"Zl\0"
|
||||
"Zp\0"
|
||||
const char _pcre_utt_names[] =
|
||||
"Any\0"
|
||||
"Arabic\0"
|
||||
"Armenian\0"
|
||||
"Balinese\0"
|
||||
"Bengali\0"
|
||||
"Bopomofo\0"
|
||||
"Braille\0"
|
||||
"Buginese\0"
|
||||
"Buhid\0"
|
||||
"C\0"
|
||||
"Canadian_Aboriginal\0"
|
||||
"Cc\0"
|
||||
"Cf\0"
|
||||
"Cherokee\0"
|
||||
"Cn\0"
|
||||
"Co\0"
|
||||
"Common\0"
|
||||
"Coptic\0"
|
||||
"Cs\0"
|
||||
"Cuneiform\0"
|
||||
"Cypriot\0"
|
||||
"Cyrillic\0"
|
||||
"Deseret\0"
|
||||
"Devanagari\0"
|
||||
"Ethiopic\0"
|
||||
"Georgian\0"
|
||||
"Glagolitic\0"
|
||||
"Gothic\0"
|
||||
"Greek\0"
|
||||
"Gujarati\0"
|
||||
"Gurmukhi\0"
|
||||
"Han\0"
|
||||
"Hangul\0"
|
||||
"Hanunoo\0"
|
||||
"Hebrew\0"
|
||||
"Hiragana\0"
|
||||
"Inherited\0"
|
||||
"Kannada\0"
|
||||
"Katakana\0"
|
||||
"Kharoshthi\0"
|
||||
"Khmer\0"
|
||||
"L\0"
|
||||
"L&\0"
|
||||
"Lao\0"
|
||||
"Latin\0"
|
||||
"Limbu\0"
|
||||
"Linear_B\0"
|
||||
"Ll\0"
|
||||
"Lm\0"
|
||||
"Lo\0"
|
||||
"Lt\0"
|
||||
"Lu\0"
|
||||
"M\0"
|
||||
"Malayalam\0"
|
||||
"Mc\0"
|
||||
"Me\0"
|
||||
"Mn\0"
|
||||
"Mongolian\0"
|
||||
"Myanmar\0"
|
||||
"N\0"
|
||||
"Nd\0"
|
||||
"New_Tai_Lue\0"
|
||||
"Nko\0"
|
||||
"Nl\0"
|
||||
"No\0"
|
||||
"Ogham\0"
|
||||
"Old_Italic\0"
|
||||
"Old_Persian\0"
|
||||
"Oriya\0"
|
||||
"Osmanya\0"
|
||||
"P\0"
|
||||
"Pc\0"
|
||||
"Pd\0"
|
||||
"Pe\0"
|
||||
"Pf\0"
|
||||
"Phags_Pa\0"
|
||||
"Phoenician\0"
|
||||
"Pi\0"
|
||||
"Po\0"
|
||||
"Ps\0"
|
||||
"Runic\0"
|
||||
"S\0"
|
||||
"Sc\0"
|
||||
"Shavian\0"
|
||||
"Sinhala\0"
|
||||
"Sk\0"
|
||||
"Sm\0"
|
||||
"So\0"
|
||||
"Syloti_Nagri\0"
|
||||
"Syriac\0"
|
||||
"Tagalog\0"
|
||||
"Tagbanwa\0"
|
||||
"Tai_Le\0"
|
||||
"Tamil\0"
|
||||
"Telugu\0"
|
||||
"Thaana\0"
|
||||
"Thai\0"
|
||||
"Tibetan\0"
|
||||
"Tifinagh\0"
|
||||
"Ugaritic\0"
|
||||
"Yi\0"
|
||||
"Z\0"
|
||||
"Zl\0"
|
||||
"Zp\0"
|
||||
"Zs\0";
|
||||
|
||||
const ucp_type_table _pcre_utt[] = {
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Balinese },
|
||||
{ 29, PT_SC, ucp_Bengali },
|
||||
{ 37, PT_SC, ucp_Bopomofo },
|
||||
{ 46, PT_SC, ucp_Braille },
|
||||
{ 54, PT_SC, ucp_Buginese },
|
||||
{ 63, PT_SC, ucp_Buhid },
|
||||
{ 69, PT_GC, ucp_C },
|
||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 91, PT_PC, ucp_Cc },
|
||||
{ 94, PT_PC, ucp_Cf },
|
||||
{ 97, PT_SC, ucp_Cherokee },
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Balinese },
|
||||
{ 29, PT_SC, ucp_Bengali },
|
||||
{ 37, PT_SC, ucp_Bopomofo },
|
||||
{ 46, PT_SC, ucp_Braille },
|
||||
{ 54, PT_SC, ucp_Buginese },
|
||||
{ 63, PT_SC, ucp_Buhid },
|
||||
{ 69, PT_GC, ucp_C },
|
||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 91, PT_PC, ucp_Cc },
|
||||
{ 94, PT_PC, ucp_Cf },
|
||||
{ 97, PT_SC, ucp_Cherokee },
|
||||
{ 106, PT_PC, ucp_Cn },
|
||||
{ 109, PT_PC, ucp_Co },
|
||||
{ 112, PT_SC, ucp_Common },
|
||||
|
@ -43,6 +43,10 @@ see if it was compiled with the opposite endianness. If so, it uses an
|
||||
auxiliary local function to flip the appropriate bytes. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@ -104,6 +108,7 @@ if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
|
||||
*internal_re = *re; /* To copy other fields */
|
||||
internal_re->size = byteflip(re->size, sizeof(re->size));
|
||||
internal_re->options = byteflip(re->options, sizeof(re->options));
|
||||
internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags));
|
||||
internal_re->top_bracket =
|
||||
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
|
||||
internal_re->top_backref =
|
||||
|
@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
string that identifies the PCRE version that is in use. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
@ -43,6 +43,10 @@ class (one that contains characters whose values are > 255). It is used by both
|
||||
pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user