mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-24 21:16:15 +01:00
PCRE 7.7
svn path=/trunk/; revision=6938
This commit is contained in:
parent
c9db84f9f2
commit
adae23350a
@ -1,3 +1,7 @@
|
||||
2008-05-27 Matthias Clasen <mclasen@redhat.com>
|
||||
|
||||
* glib/pcre/*: Update to PCRE 7.7
|
||||
|
||||
2008-05-26 Matthias Clasen <mclasen@redhat.com>
|
||||
|
||||
* glib/gchecksum.c: Add Since: tag to g_checksum_reset
|
||||
|
@ -1,35 +1,30 @@
|
||||
TOP = ..\..\..
|
||||
!INCLUDE ..\..\build\win32\make.msc
|
||||
|
||||
INCLUDES = \\
|
||||
-I ..\.. \\
|
||||
INCLUDES = \
|
||||
-I ..\.. \
|
||||
-I ..
|
||||
|
||||
DEFINES = \\
|
||||
-DPCRE_STATIC \\
|
||||
-DHAVE_CONFIG_H \\
|
||||
-DHAVE_LONG_LONG_FORMAT \\
|
||||
-DSUPPORT_UCP \\
|
||||
-DSUPPORT_UTF8 \\
|
||||
-DNEWLINE=-1 \\
|
||||
-DMATCH_LIMIT=10000000 \\
|
||||
-DMATCH_LIMIT_RECURSION=10000000 \\
|
||||
-DMAX_NAME_SIZE=32 \\
|
||||
-DMAX_NAME_COUNT=10000 \\
|
||||
-DMAX_DUPLENGTH=30000 \\
|
||||
-DLINK_SIZE=2 \\
|
||||
-DEBCDIC=0 \\
|
||||
DEFINES = \
|
||||
-DPCRE_STATIC \
|
||||
-DHAVE_CONFIG_H \
|
||||
-DHAVE_LONG_LONG_FORMAT \
|
||||
-DSUPPORT_UCP \
|
||||
-DSUPPORT_UTF8 \
|
||||
-DNEWLINE=-1 \
|
||||
-DMATCH_LIMIT=10000000 \
|
||||
-DMATCH_LIMIT_RECURSION=10000000 \
|
||||
-DMAX_NAME_SIZE=32 \
|
||||
-DMAX_NAME_COUNT=10000 \
|
||||
-DMAX_DUPLENGTH=30000 \
|
||||
-DLINK_SIZE=2 \
|
||||
-DEBCDIC=0 \
|
||||
-DPOSIX_MALLOC_THRESHOLD=10
|
||||
|
||||
OBJECTS = \\
|
||||
`
|
||||
for f in $all_files; do
|
||||
echo " $f.obj \\\\"
|
||||
done
|
||||
`
|
||||
OBJECTS = \
|
||||
|
||||
|
||||
all : pcre.lib
|
||||
|
||||
pcre.lib : \$(OBJECTS)
|
||||
lib -out:pcre.lib \$(OBJECTS)
|
||||
|
||||
pcre.lib : $(OBJECTS)
|
||||
lib -out:pcre.lib $(OBJECTS)
|
||||
|
@ -42,10 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
|
||||
#define PCRE_MINOR 6
|
||||
#define PCRE_MINOR 7
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2008-01-28
|
||||
#define PCRE_DATE 2008-05-07
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
@ -125,6 +124,7 @@ extern "C" {
|
||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
||||
#define PCRE_BSR_ANYCRLF 0x00800000
|
||||
#define PCRE_BSR_UNICODE 0x01000000
|
||||
#define PCRE_JAVASCRIPT_COMPAT 0x02000000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
|
@ -1,3 +1,6 @@
|
||||
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||
* the update of the local copy of PCRE.
|
||||
*/
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
@ -158,7 +158,7 @@ static const char verbnames[] =
|
||||
"SKIP\0"
|
||||
"THEN";
|
||||
|
||||
static verbitem verbs[] = {
|
||||
static const verbitem verbs[] = {
|
||||
{ 6, OP_ACCEPT },
|
||||
{ 6, OP_COMMIT },
|
||||
{ 1, OP_FAIL },
|
||||
@ -168,7 +168,7 @@ static verbitem verbs[] = {
|
||||
{ 4, OP_THEN }
|
||||
};
|
||||
|
||||
static int verbcount = sizeof(verbs)/sizeof(verbitem);
|
||||
static const int verbcount = sizeof(verbs)/sizeof(verbitem);
|
||||
|
||||
|
||||
/* Tables of names of POSIX character classes and their lengths. The names are
|
||||
@ -295,14 +295,15 @@ static const char error_texts[] =
|
||||
/* 55 */
|
||||
"repeating a DEFINE group is not allowed\0"
|
||||
"inconsistent NEWLINE options\0"
|
||||
"\\g is not followed by a braced name or an optionally braced non-zero number\0"
|
||||
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number\0"
|
||||
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||
"a numbered reference must not be zero\0"
|
||||
"(*VERB) with an argument is not supported\0"
|
||||
/* 60 */
|
||||
"(*VERB) not recognized\0"
|
||||
"number is too big\0"
|
||||
"subpattern name expected\0"
|
||||
"digit expected after (?+";
|
||||
"digit expected after (?+\0"
|
||||
"] is an invalid data character in JavaScript compatibility mode";
|
||||
|
||||
|
||||
/* Definition to allow mutual recursion */
|
||||
@ -378,9 +379,15 @@ if (c == 0) *errorcodeptr = ERR1;
|
||||
in a table. A non-zero result is something that can be returned immediately.
|
||||
Otherwise further processing may be required. */
|
||||
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
else if (c < '0' || c > 'z') {} /* Not alphanumeric */
|
||||
else if ((i = escapes[c - '0']) != 0) c = i;
|
||||
|
||||
#else /* EBCDIC coding */
|
||||
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphanumeric */
|
||||
else if ((i = escapes[c - 0x48]) != 0) c = i;
|
||||
#endif
|
||||
|
||||
/* Escapes that need further processing, or are illegal. */
|
||||
|
||||
else
|
||||
@ -401,14 +408,31 @@ else
|
||||
*errorcodeptr = ERR37;
|
||||
break;
|
||||
|
||||
/* \g must be followed by a number, either plain or braced. If positive, it
|
||||
is an absolute backreference. If negative, it is a relative backreference.
|
||||
This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
|
||||
reference to a named group. This is part of Perl's movement towards a
|
||||
unified syntax for back references. As this is synonymous with \k{name}, we
|
||||
fudge it up by pretending it really was \k. */
|
||||
/* \g must be followed by one of a number of specific things:
|
||||
|
||||
(1) A number, either plain or braced. If positive, it is an absolute
|
||||
backreference. If negative, it is a relative backreference. This is a Perl
|
||||
5.10 feature.
|
||||
|
||||
(2) Perl 5.10 also supports \g{name} as a reference to a named group. This
|
||||
is part of Perl's movement towards a unified syntax for back references. As
|
||||
this is synonymous with \k{name}, we fudge it up by pretending it really
|
||||
was \k.
|
||||
|
||||
(3) For Oniguruma compatibility we also support \g followed by a name or a
|
||||
number either in angle brackets or in single quotes. However, these are
|
||||
(possibly recursive) subroutine calls, _not_ backreferences. Just return
|
||||
the -ESC_g code (cf \k). */
|
||||
|
||||
case 'g':
|
||||
if (ptr[1] == '<' || ptr[1] == '\'')
|
||||
{
|
||||
c = -ESC_g;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Handle the Perl-compatible cases */
|
||||
|
||||
if (ptr[1] == '{')
|
||||
{
|
||||
const uschar *p;
|
||||
@ -435,18 +459,24 @@ else
|
||||
while (g_ascii_isdigit(ptr[1]) != 0)
|
||||
c = c * 10 + *(++ptr) - '0';
|
||||
|
||||
if (c < 0)
|
||||
if (c < 0) /* Integer overflow */
|
||||
{
|
||||
*errorcodeptr = ERR61;
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == 0 || (braced && *(++ptr) != '}'))
|
||||
if (braced && *(++ptr) != '}')
|
||||
{
|
||||
*errorcodeptr = ERR57;
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == 0)
|
||||
{
|
||||
*errorcodeptr = ERR58;
|
||||
break;
|
||||
}
|
||||
|
||||
if (negated)
|
||||
{
|
||||
if (c > bracount)
|
||||
@ -481,7 +511,7 @@ else
|
||||
c -= '0';
|
||||
while (g_ascii_isdigit(ptr[1]) != 0)
|
||||
c = c * 10 + *(++ptr) - '0';
|
||||
if (c < 0)
|
||||
if (c < 0) /* Integer overflow */
|
||||
{
|
||||
*errorcodeptr = ERR61;
|
||||
break;
|
||||
@ -822,7 +852,7 @@ be terminated by '>' because that is checked in the first pass.
|
||||
|
||||
Arguments:
|
||||
ptr current position in the pattern
|
||||
count current count of capturing parens so far encountered
|
||||
cd compile background data
|
||||
name name to seek, or NULL if seeking a numbered subpattern
|
||||
lorn name length, or subpattern number if name is NULL
|
||||
xmode TRUE if we are in /x mode
|
||||
@ -831,10 +861,11 @@ Returns: the number of the named subpattern, or -1 if not found
|
||||
*/
|
||||
|
||||
static int
|
||||
find_parens(const uschar *ptr, int count, const uschar *name, int lorn,
|
||||
find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,
|
||||
BOOL xmode)
|
||||
{
|
||||
const uschar *thisname;
|
||||
int count = cd->bracount;
|
||||
|
||||
for (; *ptr != 0; ptr++)
|
||||
{
|
||||
@ -854,10 +885,34 @@ for (; *ptr != 0; ptr++)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Skip over character classes */
|
||||
/* Skip over character classes; this logic must be similar to the way they
|
||||
are handled for real. If the first character is '^', skip it. Also, if the
|
||||
first few characters (either before or after ^) are \Q\E or \E we skip them
|
||||
too. This makes for compatibility with Perl. */
|
||||
|
||||
if (*ptr == '[')
|
||||
{
|
||||
BOOL negate_class = FALSE;
|
||||
for (;;)
|
||||
{
|
||||
int c = *(++ptr);
|
||||
if (c == '\\')
|
||||
{
|
||||
if (ptr[1] == 'E') ptr++;
|
||||
else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
|
||||
else break;
|
||||
}
|
||||
else if (!negate_class && c == '^')
|
||||
negate_class = TRUE;
|
||||
else break;
|
||||
}
|
||||
|
||||
/* If the next character is ']', it is a data character that must be
|
||||
skipped, except in JavaScript compatibility mode. */
|
||||
|
||||
if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
|
||||
ptr++;
|
||||
|
||||
while (*(++ptr) != ']')
|
||||
{
|
||||
if (*ptr == 0) return -1;
|
||||
@ -1122,6 +1177,7 @@ for (;;)
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WORDCHAR:
|
||||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
branchlength++;
|
||||
cc++;
|
||||
break;
|
||||
@ -1414,7 +1470,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
|
||||
/* Groups with zero repeats can of course be empty; skip them. */
|
||||
|
||||
if (c == OP_BRAZERO || c == OP_BRAMINZERO)
|
||||
if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)
|
||||
{
|
||||
code += _pcre_OP_lengths[c];
|
||||
do code += GET(code, 1); while (*code == OP_ALT);
|
||||
@ -1500,6 +1556,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WORDCHAR:
|
||||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
case OP_ANYBYTE:
|
||||
case OP_CHAR:
|
||||
case OP_CHARNC:
|
||||
@ -1694,11 +1751,12 @@ return -1;
|
||||
that is referenced. This means that groups can be replicated for fixed
|
||||
repetition simply by copying (because the recursion is allowed to refer to
|
||||
earlier groups that are outside the current group). However, when a group is
|
||||
optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before
|
||||
it, after it has been compiled. This means that any OP_RECURSE items within it
|
||||
that refer to the group itself or any contained groups have to have their
|
||||
offsets adjusted. That one of the jobs of this function. Before it is called,
|
||||
the partially compiled regex must be temporarily terminated with OP_END.
|
||||
optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is
|
||||
inserted before it, after it has been compiled. This means that any OP_RECURSE
|
||||
items within it that refer to the group itself or any contained groups have to
|
||||
have their offsets adjusted. That one of the jobs of this function. Before it
|
||||
is called, the partially compiled regex must be temporarily terminated with
|
||||
OP_END.
|
||||
|
||||
This function has been extended with the possibility of forward references for
|
||||
recursions and subroutine calls. It must also check the list of such references
|
||||
@ -1983,7 +2041,6 @@ if (next >= 0) switch(op_code)
|
||||
/* For OP_NOT, "item" must be a single-byte character. */
|
||||
|
||||
case OP_NOT:
|
||||
if (next < 0) return FALSE; /* Not a character */
|
||||
if (item == next) return TRUE;
|
||||
if ((options & PCRE_CASELESS) == 0) return FALSE;
|
||||
#ifdef SUPPORT_UTF8
|
||||
@ -2486,7 +2543,7 @@ for (;; ptr++)
|
||||
zerofirstbyte = firstbyte;
|
||||
zeroreqbyte = reqbyte;
|
||||
previous = code;
|
||||
*code++ = OP_ANY;
|
||||
*code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
|
||||
break;
|
||||
|
||||
|
||||
@ -2501,7 +2558,17 @@ for (;; ptr++)
|
||||
opcode is compiled. It may optionally have a bit map for characters < 256,
|
||||
but those above are are explicitly listed afterwards. A flag byte tells
|
||||
whether the bitmap is present, and whether this is a negated class or not.
|
||||
*/
|
||||
|
||||
In JavaScript compatibility mode, an isolated ']' causes an error. In
|
||||
default (Perl) mode, it is treated as a data character. */
|
||||
|
||||
case ']':
|
||||
if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
|
||||
{
|
||||
*errorcodeptr = ERR64;
|
||||
goto FAILED;
|
||||
}
|
||||
goto NORMAL_CHAR;
|
||||
|
||||
case '[':
|
||||
previous = code;
|
||||
@ -2535,6 +2602,19 @@ for (;; ptr++)
|
||||
else break;
|
||||
}
|
||||
|
||||
/* Empty classes are allowed in JavaScript compatibility mode. Otherwise,
|
||||
an initial ']' is taken as a data character -- the code below handles
|
||||
that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
|
||||
[^] must match any character, so generate OP_ALLANY. */
|
||||
|
||||
if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
|
||||
{
|
||||
*code++ = negate_class? OP_ALLANY : OP_FAIL;
|
||||
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
|
||||
zerofirstbyte = firstbyte;
|
||||
break;
|
||||
}
|
||||
|
||||
/* If a class contains a negative special such as \S, we need to flip the
|
||||
negation flag at the end, so that support for characters > 255 works
|
||||
correctly (they are all included in the class). */
|
||||
@ -3690,28 +3770,38 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
|
||||
if (repeat_min == 0)
|
||||
{
|
||||
/* If the maximum is also zero, we just omit the group from the output
|
||||
altogether. */
|
||||
/* If the maximum is also zero, we used to just omit the group from the
|
||||
output altogether, like this:
|
||||
|
||||
if (repeat_max == 0)
|
||||
{
|
||||
code = previous;
|
||||
goto END_REPEAT;
|
||||
}
|
||||
** if (repeat_max == 0)
|
||||
** {
|
||||
** code = previous;
|
||||
** goto END_REPEAT;
|
||||
** }
|
||||
|
||||
/* If the maximum is 1 or unlimited, we just have to stick in the
|
||||
BRAZERO and do no more at this point. However, we do need to adjust
|
||||
any OP_RECURSE calls inside the group that refer to the group itself or
|
||||
any internal or forward referenced group, because the offset is from
|
||||
the start of the whole regex. Temporarily terminate the pattern while
|
||||
doing this. */
|
||||
However, that fails when a group is referenced as a subroutine from
|
||||
elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it
|
||||
so that it is skipped on execution. As we don't have a list of which
|
||||
groups are referenced, we cannot do this selectively.
|
||||
|
||||
if (repeat_max <= 1)
|
||||
If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
|
||||
and do no more at this point. However, we do need to adjust any
|
||||
OP_RECURSE calls inside the group that refer to the group itself or any
|
||||
internal or forward referenced group, because the offset is from the
|
||||
start of the whole regex. Temporarily terminate the pattern while doing
|
||||
this. */
|
||||
|
||||
if (repeat_max <= 1) /* Covers 0, 1, and unlimited */
|
||||
{
|
||||
*code = OP_END;
|
||||
adjust_recurse(previous, 1, utf8, cd, save_hwm);
|
||||
memmove(previous+1, previous, len);
|
||||
code++;
|
||||
if (repeat_max == 0)
|
||||
{
|
||||
*previous++ = OP_SKIPZERO;
|
||||
goto END_REPEAT;
|
||||
}
|
||||
*previous++ = OP_BRAZERO + repeat_type;
|
||||
}
|
||||
|
||||
@ -3906,6 +3996,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
}
|
||||
}
|
||||
|
||||
/* If previous is OP_FAIL, it was generated by an empty class [] in
|
||||
JavaScript mode. The other ways in which OP_FAIL can be generated, that is
|
||||
by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat"
|
||||
error above. We can just ignore the repeat in JS case. */
|
||||
|
||||
else if (*previous == OP_FAIL) goto END_REPEAT;
|
||||
|
||||
/* Else there's some kind of shambles */
|
||||
|
||||
else
|
||||
@ -4192,7 +4289,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
|
||||
/* Search the pattern for a forward reference */
|
||||
|
||||
else if ((i = find_parens(ptr, cd->bracount, name, namelen,
|
||||
else if ((i = find_parens(ptr, cd, name, namelen,
|
||||
(options & PCRE_EXTENDED) != 0)) > 0)
|
||||
{
|
||||
PUT2(code, 2+LINK_SIZE, i);
|
||||
@ -4438,7 +4535,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
references (?P=name) and recursion (?P>name), as well as falling
|
||||
through from the Perl recursion syntax (?&name). We also come here from
|
||||
the Perl \k<name> or \k'name' back reference syntax and the \k{name}
|
||||
.NET syntax. */
|
||||
.NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */
|
||||
|
||||
NAMED_REF_OR_RECURSE:
|
||||
name = ++ptr;
|
||||
@ -4489,7 +4586,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
recno = GET2(slot, 0);
|
||||
}
|
||||
else if ((recno = /* Forward back reference */
|
||||
find_parens(ptr, cd->bracount, name, namelen,
|
||||
find_parens(ptr, cd, name, namelen,
|
||||
(options & PCRE_EXTENDED) != 0)) <= 0)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
@ -4516,6 +4613,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
case '5': case '6': case '7': case '8': case '9': /* subroutine */
|
||||
{
|
||||
const uschar *called;
|
||||
terminator = ')';
|
||||
|
||||
/* Come here from the \g<...> and \g'...' code (Oniguruma
|
||||
compatibility). However, the syntax has been checked to ensure that
|
||||
the ... are a (signed) number, so that neither ERR63 nor ERR29 will
|
||||
be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY
|
||||
ever be taken. */
|
||||
|
||||
HANDLE_NUMERICAL_RECURSION:
|
||||
|
||||
if ((refsign = *ptr) == '+')
|
||||
{
|
||||
@ -4537,7 +4643,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
while(g_ascii_isdigit(*ptr) != 0)
|
||||
recno = recno * 10 + *ptr++ - '0';
|
||||
|
||||
if (*ptr != ')')
|
||||
if (*ptr != terminator)
|
||||
{
|
||||
*errorcodeptr = ERR29;
|
||||
goto FAILED;
|
||||
@ -4590,8 +4696,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
|
||||
if (called == NULL)
|
||||
{
|
||||
if (find_parens(ptr, cd->bracount, NULL, recno,
|
||||
(options & PCRE_EXTENDED) != 0) < 0)
|
||||
if (find_parens(ptr, cd, NULL, recno,
|
||||
(options & PCRE_EXTENDED) != 0) < 0)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
goto FAILED;
|
||||
@ -4961,6 +5067,64 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
zerofirstbyte = firstbyte;
|
||||
zeroreqbyte = reqbyte;
|
||||
|
||||
/* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
|
||||
is a subroutine call by number (Oniguruma syntax). In fact, the value
|
||||
-ESC_g is returned only for these cases. So we don't need to check for <
|
||||
or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is
|
||||
-ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as
|
||||
that is a synonym for a named back reference). */
|
||||
|
||||
if (-c == ESC_g)
|
||||
{
|
||||
const uschar *p;
|
||||
save_hwm = cd->hwm; /* Normally this is set when '(' is read */
|
||||
terminator = (*(++ptr) == '<')? '>' : '\'';
|
||||
|
||||
/* These two statements stop the compiler for warning about possibly
|
||||
unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
|
||||
fact, because we actually check for a number below, the paths that
|
||||
would actually be in error are never taken. */
|
||||
|
||||
skipbytes = 0;
|
||||
reset_bracount = FALSE;
|
||||
|
||||
/* Test for a name */
|
||||
|
||||
if (ptr[1] != '+' && ptr[1] != '-')
|
||||
{
|
||||
BOOL isnumber = TRUE;
|
||||
for (p = ptr + 1; *p != 0 && *p != terminator; p++)
|
||||
{
|
||||
if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
|
||||
if ((cd->ctypes[*p] & ctype_word) == 0) break;
|
||||
}
|
||||
if (*p != terminator)
|
||||
{
|
||||
*errorcodeptr = ERR57;
|
||||
break;
|
||||
}
|
||||
if (isnumber)
|
||||
{
|
||||
ptr++;
|
||||
goto HANDLE_NUMERICAL_RECURSION;
|
||||
}
|
||||
is_recurse = TRUE;
|
||||
goto NAMED_REF_OR_RECURSE;
|
||||
}
|
||||
|
||||
/* Test a signed number in angle brackets or quotes. */
|
||||
|
||||
p = ptr + 2;
|
||||
while (g_ascii_isdigit(*p) != 0) p++;
|
||||
if (*p != terminator)
|
||||
{
|
||||
*errorcodeptr = ERR57;
|
||||
break;
|
||||
}
|
||||
ptr++;
|
||||
goto HANDLE_NUMERICAL_RECURSION;
|
||||
}
|
||||
|
||||
/* \k<name> or \k'name' is a back reference by name (Perl syntax).
|
||||
We also support \k{name} (.NET syntax) */
|
||||
|
||||
@ -5467,14 +5631,14 @@ do {
|
||||
if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;
|
||||
}
|
||||
|
||||
/* .* is not anchored unless DOTALL is set and it isn't in brackets that
|
||||
are or may be referenced. */
|
||||
/* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
|
||||
it isn't in brackets that are or may be referenced. */
|
||||
|
||||
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
|
||||
op == OP_TYPEPOSSTAR) &&
|
||||
(*options & PCRE_DOTALL) != 0)
|
||||
op == OP_TYPEPOSSTAR))
|
||||
{
|
||||
if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;
|
||||
if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Check for explicit anchoring */
|
||||
|
@ -84,11 +84,11 @@ centralize the loading of these characters. In the case of Type * etc, the
|
||||
small value. ***NOTE*** If the start of this table is modified, the two tables
|
||||
that follow must also be modified. */
|
||||
|
||||
static uschar coptable[] = {
|
||||
static const uschar coptable[] = {
|
||||
0, /* End */
|
||||
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
|
||||
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
|
||||
0, 0, /* Any, Anybyte */
|
||||
0, 0, 0, /* Any, AllAny, Anybyte */
|
||||
0, 0, 0, /* NOTPROP, PROP, EXTUNI */
|
||||
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
||||
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
|
||||
@ -132,26 +132,26 @@ static uschar coptable[] = {
|
||||
0, /* DEF */
|
||||
0, 0, /* BRAZERO, BRAMINZERO */
|
||||
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
|
||||
0, 0 /* FAIL, ACCEPT */
|
||||
0, 0, 0 /* FAIL, ACCEPT, SKIPZERO */
|
||||
};
|
||||
|
||||
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
||||
and \w */
|
||||
|
||||
static uschar toptable1[] = {
|
||||
static const uschar toptable1[] = {
|
||||
0, 0, 0, 0, 0, 0,
|
||||
ctype_digit, ctype_digit,
|
||||
ctype_space, ctype_space,
|
||||
ctype_word, ctype_word,
|
||||
0 /* OP_ANY */
|
||||
0, 0 /* OP_ANY, OP_ALLANY */
|
||||
};
|
||||
|
||||
static uschar toptable2[] = {
|
||||
static const uschar toptable2[] = {
|
||||
0, 0, 0, 0, 0, 0,
|
||||
ctype_digit, 0,
|
||||
ctype_space, 0,
|
||||
ctype_word, 0,
|
||||
1 /* OP_ANY */
|
||||
1, 1 /* OP_ANY, OP_ALLANY */
|
||||
};
|
||||
|
||||
|
||||
@ -223,8 +223,8 @@ Arguments:
|
||||
rlevel function call recursion level
|
||||
recursing regex recursive call level
|
||||
|
||||
Returns: > 0 =>
|
||||
= 0 =>
|
||||
Returns: > 0 => number of match offset pairs placed in offsets
|
||||
= 0 => offsets overflowed; longest matches are present
|
||||
-1 => failed to match
|
||||
< -1 => some kind of unexpected problem
|
||||
|
||||
@ -693,6 +693,13 @@ for (;;)
|
||||
ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_SKIPZERO:
|
||||
code += 1 + GET(code, 2);
|
||||
while (*code == OP_ALT) code += GET(code, 1);
|
||||
ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CIRC:
|
||||
if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
|
||||
@ -732,7 +739,13 @@ for (;;)
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANY:
|
||||
if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
|
||||
if (clen > 0 && !IS_NEWLINE(ptr))
|
||||
{ ADD_NEW(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ALLANY:
|
||||
if (clen > 0)
|
||||
{ ADD_NEW(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
@ -852,8 +865,8 @@ for (;;)
|
||||
/* ========================================================================== */
|
||||
/* These opcodes likewise inspect the subject character, but have an
|
||||
argument that is not a data character. It is one of these opcodes:
|
||||
OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,
|
||||
OP_NOT_WORDCHAR. The value is loaded into d. */
|
||||
OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
|
||||
OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
@ -864,10 +877,7 @@ for (;;)
|
||||
{
|
||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY ||
|
||||
(ims & PCRE_DOTALL) != 0 ||
|
||||
!IS_NEWLINE(ptr)
|
||||
) &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_TYPEPOSPLUS)
|
||||
@ -890,10 +900,7 @@ for (;;)
|
||||
{
|
||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY ||
|
||||
(ims & PCRE_DOTALL) != 0 ||
|
||||
!IS_NEWLINE(ptr)
|
||||
) &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (codevalue == OP_TYPEPOSQUERY)
|
||||
@ -915,10 +922,7 @@ for (;;)
|
||||
{
|
||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY ||
|
||||
(ims & PCRE_DOTALL) != 0 ||
|
||||
!IS_NEWLINE(ptr)
|
||||
) &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (codevalue == OP_TYPEPOSSTAR)
|
||||
@ -938,10 +942,7 @@ for (;;)
|
||||
{
|
||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY ||
|
||||
(ims & PCRE_DOTALL) != 0 ||
|
||||
!IS_NEWLINE(ptr)
|
||||
) &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (++count >= GET2(code, 1))
|
||||
@ -962,10 +963,7 @@ for (;;)
|
||||
{
|
||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY ||
|
||||
(ims & PCRE_DOTALL) != 0 ||
|
||||
!IS_NEWLINE(ptr)
|
||||
) &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (codevalue == OP_TYPEPOSUPTO)
|
||||
@ -2162,7 +2160,12 @@ for (;;)
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These are the opcodes for fancy brackets of various kinds. We have
|
||||
to use recursion in order to handle them. */
|
||||
to use recursion in order to handle them. The "always failing" assersion
|
||||
(?!) is optimised when compiling to OP_FAIL, so we have to support that,
|
||||
though the other "backtracking verbs" are not supported. */
|
||||
|
||||
case OP_FAIL:
|
||||
break;
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
|
@ -1148,11 +1148,11 @@ for (;;)
|
||||
do ecode += GET(ecode,1); while (*ecode == OP_ALT);
|
||||
break;
|
||||
|
||||
/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
|
||||
that it may occur zero times. It may repeat infinitely, or not at all -
|
||||
i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
|
||||
repeat limits are compiled as a number of copies, with the optional ones
|
||||
preceded by BRAZERO or BRAMINZERO. */
|
||||
/* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
|
||||
indicating that it may occur zero times. It may repeat infinitely, or not
|
||||
at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
|
||||
with fixed upper repeat limits are compiled as a number of copies, with the
|
||||
optional ones preceded by BRAZERO or BRAMINZERO. */
|
||||
|
||||
case OP_BRAZERO:
|
||||
{
|
||||
@ -1174,6 +1174,14 @@ for (;;)
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_SKIPZERO:
|
||||
{
|
||||
next = ecode+1;
|
||||
do next += GET(next,1); while (*next == OP_ALT);
|
||||
ecode = next + 1 + LINK_SIZE;
|
||||
}
|
||||
break;
|
||||
|
||||
/* End of a group, repeated or non-repeating. */
|
||||
|
||||
case OP_KET:
|
||||
@ -1421,13 +1429,12 @@ for (;;)
|
||||
/* Match a single character type; inline for speed */
|
||||
|
||||
case OP_ANY:
|
||||
if ((ims & PCRE_DOTALL) == 0)
|
||||
{
|
||||
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
|
||||
/* Fall through */
|
||||
|
||||
case OP_ALLANY:
|
||||
if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
if (utf8)
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
ecode++;
|
||||
break;
|
||||
|
||||
@ -1723,16 +1730,25 @@ for (;;)
|
||||
case OP_REF:
|
||||
{
|
||||
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
||||
ecode += 3; /* Advance past item */
|
||||
ecode += 3;
|
||||
|
||||
/* If the reference is unset, set the length to be longer than the amount
|
||||
of subject left; this ensures that every attempt at a match fails. We
|
||||
can't just fail here, because of the possibility of quantifiers with zero
|
||||
minima. */
|
||||
/* If the reference is unset, there are two possibilities:
|
||||
|
||||
length = (offset >= offset_top || md->offset_vector[offset] < 0)?
|
||||
md->end_subject - eptr + 1 :
|
||||
md->offset_vector[offset+1] - md->offset_vector[offset];
|
||||
(a) In the default, Perl-compatible state, set the length to be longer
|
||||
than the amount of subject left; this ensures that every attempt at a
|
||||
match fails. We can't just fail here, because of the possibility of
|
||||
quantifiers with zero minima.
|
||||
|
||||
(b) If the JavaScript compatibility flag is set, set the length to zero
|
||||
so that the back reference matches an empty string.
|
||||
|
||||
Otherwise, set the length to the length of what was matched by the
|
||||
referenced subpattern. */
|
||||
|
||||
if (offset >= offset_top || md->offset_vector[offset] < 0)
|
||||
length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
|
||||
else
|
||||
length = md->offset_vector[offset+1] - md->offset_vector[offset];
|
||||
|
||||
/* Set up for repetition, or handle the non-repeated case */
|
||||
|
||||
@ -2935,14 +2951,22 @@ for (;;)
|
||||
case OP_ANY:
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject ||
|
||||
((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
|
||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_ALLANY:
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_ANYBYTE:
|
||||
eptr += min;
|
||||
break;
|
||||
@ -3151,15 +3175,15 @@ for (;;)
|
||||
switch(ctype)
|
||||
{
|
||||
case OP_ANY:
|
||||
if ((ims & PCRE_DOTALL) == 0)
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
|
||||
eptr++;
|
||||
}
|
||||
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
|
||||
eptr++;
|
||||
}
|
||||
else eptr += min;
|
||||
break;
|
||||
|
||||
case OP_ALLANY:
|
||||
eptr += min;
|
||||
break;
|
||||
|
||||
case OP_ANYBYTE:
|
||||
@ -3416,16 +3440,14 @@ for (;;)
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject ||
|
||||
(ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
|
||||
IS_NEWLINE(eptr)))
|
||||
(ctype == OP_ANY && IS_NEWLINE(eptr)))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
GETCHARINC(c, eptr);
|
||||
switch(ctype)
|
||||
{
|
||||
case OP_ANY: /* This is the DOTALL case */
|
||||
break;
|
||||
|
||||
case OP_ANY: /* This is the non-NL case */
|
||||
case OP_ALLANY:
|
||||
case OP_ANYBYTE:
|
||||
break;
|
||||
|
||||
@ -3577,15 +3599,14 @@ for (;;)
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject ||
|
||||
((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
|
||||
(ctype == OP_ANY && IS_NEWLINE(eptr)))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
c = *eptr++;
|
||||
switch(ctype)
|
||||
{
|
||||
case OP_ANY: /* This is the DOTALL case */
|
||||
break;
|
||||
|
||||
case OP_ANY: /* This is the non-NL case */
|
||||
case OP_ALLANY:
|
||||
case OP_ANYBYTE:
|
||||
break;
|
||||
|
||||
@ -3839,23 +3860,11 @@ for (;;)
|
||||
case OP_ANY:
|
||||
if (max < INT_MAX)
|
||||
{
|
||||
if ((ims & PCRE_DOTALL) == 0)
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject) break;
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3863,22 +3872,28 @@ for (;;)
|
||||
|
||||
else
|
||||
{
|
||||
if ((ims & PCRE_DOTALL) == 0)
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
eptr = md->end_subject;
|
||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_ALLANY:
|
||||
if (max < INT_MAX)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject) break;
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
}
|
||||
else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
|
||||
break;
|
||||
|
||||
/* The byte case is the same as non-UTF8 */
|
||||
|
||||
case OP_ANYBYTE:
|
||||
@ -4064,17 +4079,14 @@ for (;;)
|
||||
switch(ctype)
|
||||
{
|
||||
case OP_ANY:
|
||||
if ((ims & PCRE_DOTALL) == 0)
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||
eptr++;
|
||||
}
|
||||
break;
|
||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||
eptr++;
|
||||
}
|
||||
/* For DOTALL case, fall through and treat as \C */
|
||||
break;
|
||||
|
||||
case OP_ALLANY:
|
||||
case OP_ANYBYTE:
|
||||
c = max - min;
|
||||
if (c > (unsigned int)(md->end_subject - eptr))
|
||||
@ -4450,6 +4462,7 @@ end_subject = md->end_subject;
|
||||
|
||||
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
|
||||
utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
|
||||
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
|
||||
|
||||
md->notbol = (options & PCRE_NOTBOL) != 0;
|
||||
md->noteol = (options & PCRE_NOTEOL) != 0;
|
||||
|
@ -52,6 +52,8 @@ differently, and global variables are not used (see pcre.in). */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
||||
|
@ -514,7 +514,8 @@ time, run time, or study time, respectively. */
|
||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
||||
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
|
||||
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
|
||||
PCRE_JAVASCRIPT_COMPAT)
|
||||
|
||||
#define PUBLIC_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
@ -601,16 +602,20 @@ contain UTF-8 characters with values greater than 255. */
|
||||
value such as \n. They must have non-zero values, as check_escape() returns
|
||||
their negation. Also, they must appear in the same order as in the opcode
|
||||
definitions below, up to ESC_z. There's a dummy for OP_ANY because it
|
||||
corresponds to "." rather than an escape sequence. The final one must be
|
||||
ESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc).
|
||||
There are two tests in the code for an escape greater than ESC_b and less than
|
||||
ESC_Z to detect the types that may be repeated. These are the types that
|
||||
consume characters. If any new escapes are put in between that don't consume a
|
||||
character, that code will have to change. */
|
||||
corresponds to "." rather than an escape sequence, and another for OP_ALLANY
|
||||
(which is used for [^] in JavaScript compatibility mode).
|
||||
|
||||
The final escape must be ESC_REF as subsequent values are used for
|
||||
backreferences (\1, \2, \3, etc). There are two tests in the code for an escape
|
||||
greater than ESC_b and less than ESC_Z to detect the types that may be
|
||||
repeated. These are the types that consume characters. If any new escapes are
|
||||
put in between that don't consume a character, that code will have to change.
|
||||
*/
|
||||
|
||||
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||
ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
|
||||
ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
|
||||
ESC_W, ESC_w, ESC_dum1, ESC_dum2, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
|
||||
ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k,
|
||||
ESC_REF };
|
||||
|
||||
|
||||
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
|
||||
@ -636,141 +641,146 @@ enum {
|
||||
OP_WHITESPACE, /* 9 \s */
|
||||
OP_NOT_WORDCHAR, /* 10 \W */
|
||||
OP_WORDCHAR, /* 11 \w */
|
||||
OP_ANY, /* 12 Match any character */
|
||||
OP_ANYBYTE, /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_NOTPROP, /* 14 \P (not Unicode property) */
|
||||
OP_PROP, /* 15 \p (Unicode property) */
|
||||
OP_ANYNL, /* 16 \R (any newline sequence) */
|
||||
OP_NOT_HSPACE, /* 17 \H (not horizontal whitespace) */
|
||||
OP_HSPACE, /* 18 \h (horizontal whitespace) */
|
||||
OP_NOT_VSPACE, /* 19 \V (not vertical whitespace) */
|
||||
OP_VSPACE, /* 20 \v (vertical whitespace) */
|
||||
OP_EXTUNI, /* 21 \X (extended Unicode sequence */
|
||||
OP_EODN, /* 22 End of data or \n at end of data: \Z. */
|
||||
OP_EOD, /* 23 End of data: \z */
|
||||
OP_ANY, /* 12 Match any character (subject to DOTALL) */
|
||||
OP_ALLANY, /* 13 Match any character (not subject to DOTALL) */
|
||||
OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_NOTPROP, /* 15 \P (not Unicode property) */
|
||||
OP_PROP, /* 16 \p (Unicode property) */
|
||||
OP_ANYNL, /* 17 \R (any newline sequence) */
|
||||
OP_NOT_HSPACE, /* 18 \H (not horizontal whitespace) */
|
||||
OP_HSPACE, /* 19 \h (horizontal whitespace) */
|
||||
OP_NOT_VSPACE, /* 20 \V (not vertical whitespace) */
|
||||
OP_VSPACE, /* 21 \v (vertical whitespace) */
|
||||
OP_EXTUNI, /* 22 \X (extended Unicode sequence */
|
||||
OP_EODN, /* 23 End of data or \n at end of data: \Z. */
|
||||
OP_EOD, /* 24 End of data: \z */
|
||||
|
||||
OP_OPT, /* 24 Set runtime options */
|
||||
OP_CIRC, /* 25 Start of line - varies with multiline switch */
|
||||
OP_DOLL, /* 26 End of line - varies with multiline switch */
|
||||
OP_CHAR, /* 27 Match one character, casefully */
|
||||
OP_CHARNC, /* 28 Match one character, caselessly */
|
||||
OP_NOT, /* 29 Match one character, not the following one */
|
||||
OP_OPT, /* 25 Set runtime options */
|
||||
OP_CIRC, /* 26 Start of line - varies with multiline switch */
|
||||
OP_DOLL, /* 27 End of line - varies with multiline switch */
|
||||
OP_CHAR, /* 28 Match one character, casefully */
|
||||
OP_CHARNC, /* 29 Match one character, caselessly */
|
||||
OP_NOT, /* 30 Match one character, not the following one */
|
||||
|
||||
OP_STAR, /* 30 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 31 these six opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 32 the minimizing one second. */
|
||||
OP_MINPLUS, /* 33 This first set applies to single characters.*/
|
||||
OP_QUERY, /* 34 */
|
||||
OP_MINQUERY, /* 35 */
|
||||
OP_STAR, /* 31 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 32 these six opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 33 the minimizing one second. */
|
||||
OP_MINPLUS, /* 34 This first set applies to single characters.*/
|
||||
OP_QUERY, /* 35 */
|
||||
OP_MINQUERY, /* 36 */
|
||||
|
||||
OP_UPTO, /* 36 From 0 to n matches */
|
||||
OP_MINUPTO, /* 37 */
|
||||
OP_EXACT, /* 38 Exactly n matches */
|
||||
OP_UPTO, /* 37 From 0 to n matches */
|
||||
OP_MINUPTO, /* 38 */
|
||||
OP_EXACT, /* 39 Exactly n matches */
|
||||
|
||||
OP_POSSTAR, /* 39 Possessified star */
|
||||
OP_POSPLUS, /* 40 Possessified plus */
|
||||
OP_POSQUERY, /* 41 Posesssified query */
|
||||
OP_POSUPTO, /* 42 Possessified upto */
|
||||
OP_POSSTAR, /* 40 Possessified star */
|
||||
OP_POSPLUS, /* 41 Possessified plus */
|
||||
OP_POSQUERY, /* 42 Posesssified query */
|
||||
OP_POSUPTO, /* 43 Possessified upto */
|
||||
|
||||
OP_NOTSTAR, /* 43 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 44 these six opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 45 the minimizing one second. They must be in */
|
||||
OP_NOTMINPLUS, /* 46 exactly the same order as those above. */
|
||||
OP_NOTQUERY, /* 47 This set applies to "not" single characters. */
|
||||
OP_NOTMINQUERY, /* 48 */
|
||||
OP_NOTSTAR, /* 44 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 45 these six opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 46 the minimizing one second. They must be in */
|
||||
OP_NOTMINPLUS, /* 47 exactly the same order as those above. */
|
||||
OP_NOTQUERY, /* 48 This set applies to "not" single characters. */
|
||||
OP_NOTMINQUERY, /* 49 */
|
||||
|
||||
OP_NOTUPTO, /* 49 From 0 to n matches */
|
||||
OP_NOTMINUPTO, /* 50 */
|
||||
OP_NOTEXACT, /* 51 Exactly n matches */
|
||||
OP_NOTUPTO, /* 50 From 0 to n matches */
|
||||
OP_NOTMINUPTO, /* 51 */
|
||||
OP_NOTEXACT, /* 52 Exactly n matches */
|
||||
|
||||
OP_NOTPOSSTAR, /* 52 Possessified versions */
|
||||
OP_NOTPOSPLUS, /* 53 */
|
||||
OP_NOTPOSQUERY, /* 54 */
|
||||
OP_NOTPOSUPTO, /* 55 */
|
||||
OP_NOTPOSSTAR, /* 53 Possessified versions */
|
||||
OP_NOTPOSPLUS, /* 54 */
|
||||
OP_NOTPOSQUERY, /* 55 */
|
||||
OP_NOTPOSUPTO, /* 56 */
|
||||
|
||||
OP_TYPESTAR, /* 56 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 57 these six opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 58 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 59 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 60 This set applies to character types such as \d */
|
||||
OP_TYPEMINQUERY, /* 61 */
|
||||
OP_TYPESTAR, /* 57 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 58 these six opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 59 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 60 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 61 This set applies to character types such as \d */
|
||||
OP_TYPEMINQUERY, /* 62 */
|
||||
|
||||
OP_TYPEUPTO, /* 62 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 63 */
|
||||
OP_TYPEEXACT, /* 64 Exactly n matches */
|
||||
OP_TYPEUPTO, /* 63 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 64 */
|
||||
OP_TYPEEXACT, /* 65 Exactly n matches */
|
||||
|
||||
OP_TYPEPOSSTAR, /* 65 Possessified versions */
|
||||
OP_TYPEPOSPLUS, /* 66 */
|
||||
OP_TYPEPOSQUERY, /* 67 */
|
||||
OP_TYPEPOSUPTO, /* 68 */
|
||||
OP_TYPEPOSSTAR, /* 66 Possessified versions */
|
||||
OP_TYPEPOSPLUS, /* 67 */
|
||||
OP_TYPEPOSQUERY, /* 68 */
|
||||
OP_TYPEPOSUPTO, /* 69 */
|
||||
|
||||
OP_CRSTAR, /* 69 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 70 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 71 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 72 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 73 These are for character classes and back refs */
|
||||
OP_CRMINQUERY, /* 74 */
|
||||
OP_CRRANGE, /* 75 These are different to the three sets above. */
|
||||
OP_CRMINRANGE, /* 76 */
|
||||
OP_CRSTAR, /* 70 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 71 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 72 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 73 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 74 These are for character classes and back refs */
|
||||
OP_CRMINQUERY, /* 75 */
|
||||
OP_CRRANGE, /* 76 These are different to the three sets above. */
|
||||
OP_CRMINRANGE, /* 77 */
|
||||
|
||||
OP_CLASS, /* 77 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 78 Same, but the bitmap was created from a negative
|
||||
OP_CLASS, /* 78 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 79 Same, but the bitmap was created from a negative
|
||||
class - the difference is relevant only when a UTF-8
|
||||
character > 255 is encountered. */
|
||||
|
||||
OP_XCLASS, /* 79 Extended class for handling UTF-8 chars within the
|
||||
OP_XCLASS, /* 80 Extended class for handling UTF-8 chars within the
|
||||
class. This does both positive and negative. */
|
||||
|
||||
OP_REF, /* 80 Match a back reference */
|
||||
OP_RECURSE, /* 81 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 82 Call out to external function if provided */
|
||||
OP_REF, /* 81 Match a back reference */
|
||||
OP_RECURSE, /* 82 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 83 Call out to external function if provided */
|
||||
|
||||
OP_ALT, /* 83 Start of alternation */
|
||||
OP_KET, /* 84 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 85 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 86 order. They are for groups the repeat for ever. */
|
||||
OP_ALT, /* 84 Start of alternation */
|
||||
OP_KET, /* 85 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 86 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 87 order. They are for groups the repeat for ever. */
|
||||
|
||||
/* The assertions must come before BRA, CBRA, ONCE, and COND.*/
|
||||
|
||||
OP_ASSERT, /* 87 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 88 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 89 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */
|
||||
OP_REVERSE, /* 91 Move pointer back - used in lookbehind assertions */
|
||||
OP_ASSERT, /* 88 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 89 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 90 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 91 Negative lookbehind */
|
||||
OP_REVERSE, /* 92 Move pointer back - used in lookbehind assertions */
|
||||
|
||||
/* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
|
||||
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||
|
||||
OP_ONCE, /* 92 Atomic group */
|
||||
OP_BRA, /* 93 Start of non-capturing bracket */
|
||||
OP_CBRA, /* 94 Start of capturing bracket */
|
||||
OP_COND, /* 95 Conditional group */
|
||||
OP_ONCE, /* 93 Atomic group */
|
||||
OP_BRA, /* 94 Start of non-capturing bracket */
|
||||
OP_CBRA, /* 95 Start of capturing bracket */
|
||||
OP_COND, /* 96 Conditional group */
|
||||
|
||||
/* These three must follow the previous three, in the same order. There's a
|
||||
check for >= SBRA to distinguish the two sets. */
|
||||
|
||||
OP_SBRA, /* 96 Start of non-capturing bracket, check empty */
|
||||
OP_SCBRA, /* 97 Start of capturing bracket, check empty */
|
||||
OP_SCOND, /* 98 Conditional group, check empty */
|
||||
OP_SBRA, /* 97 Start of non-capturing bracket, check empty */
|
||||
OP_SCBRA, /* 98 Start of capturing bracket, check empty */
|
||||
OP_SCOND, /* 99 Conditional group, check empty */
|
||||
|
||||
OP_CREF, /* 99 Used to hold a capture number as condition */
|
||||
OP_RREF, /* 100 Used to hold a recursion number as condition */
|
||||
OP_DEF, /* 101 The DEFINE condition */
|
||||
OP_CREF, /* 100 Used to hold a capture number as condition */
|
||||
OP_RREF, /* 101 Used to hold a recursion number as condition */
|
||||
OP_DEF, /* 102 The DEFINE condition */
|
||||
|
||||
OP_BRAZERO, /* 102 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 103 order. */
|
||||
OP_BRAZERO, /* 103 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 104 order. */
|
||||
|
||||
/* These are backtracking control verbs */
|
||||
|
||||
OP_PRUNE, /* 104 */
|
||||
OP_SKIP, /* 105 */
|
||||
OP_THEN, /* 106 */
|
||||
OP_COMMIT, /* 107 */
|
||||
OP_PRUNE, /* 105 */
|
||||
OP_SKIP, /* 106 */
|
||||
OP_THEN, /* 107 */
|
||||
OP_COMMIT, /* 108 */
|
||||
|
||||
/* These are forced failure and success verbs */
|
||||
|
||||
OP_FAIL, /* 108 */
|
||||
OP_ACCEPT /* 109 */
|
||||
OP_FAIL, /* 109 */
|
||||
OP_ACCEPT, /* 110 */
|
||||
|
||||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO /* 111 */
|
||||
};
|
||||
|
||||
|
||||
@ -779,7 +789,7 @@ for debugging. The macro is referenced only in pcre_printint.c. */
|
||||
|
||||
#define OP_NAME_LIST \
|
||||
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
|
||||
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \
|
||||
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
|
||||
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
||||
"extuni", "\\Z", "\\z", \
|
||||
"Opt", "^", "$", "char", "charnc", "not", \
|
||||
@ -795,7 +805,8 @@ for debugging. The macro is referenced only in pcre_printint.c. */
|
||||
"AssertB", "AssertB not", "Reverse", \
|
||||
"Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \
|
||||
"Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero", \
|
||||
"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT"
|
||||
"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT", \
|
||||
"Skip zero"
|
||||
|
||||
|
||||
/* This macro defines the length of fixed length operations in the compiled
|
||||
@ -811,7 +822,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
1, /* End */ \
|
||||
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
|
||||
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, /* Any, Anybyte */ \
|
||||
1, 1, 1, /* Any, AllAny, Anybyte */ \
|
||||
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
||||
@ -860,7 +871,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
1, /* DEF */ \
|
||||
1, 1, /* BRAZERO, BRAMINZERO */ \
|
||||
1, 1, 1, 1, /* PRUNE, SKIP, THEN, COMMIT, */ \
|
||||
1, 1 /* FAIL, ACCEPT */
|
||||
1, 1, 1 /* FAIL, ACCEPT, SKIPZERO */
|
||||
|
||||
|
||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||
@ -876,7 +887,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||
ERR60, ERR61, ERR62, ERR63 };
|
||||
ERR60, ERR61, ERR62, ERR63, ERR64 };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
@ -1001,6 +1012,7 @@ typedef struct match_data {
|
||||
BOOL notbol; /* NOTBOL flag */
|
||||
BOOL noteol; /* NOTEOL flag */
|
||||
BOOL utf8; /* UTF8 flag */
|
||||
BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */
|
||||
BOOL endonly; /* Dollar not before final \n */
|
||||
BOOL notempty; /* Empty string match not wanted */
|
||||
BOOL partial; /* PARTIAL flag */
|
||||
|
@ -217,6 +217,13 @@ do
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* SKIPZERO skips the bracket. */
|
||||
|
||||
case OP_SKIPZERO:
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Single-char * or ? sets the bit and tries the next item */
|
||||
|
||||
case OP_STAR:
|
||||
@ -341,6 +348,7 @@ do
|
||||
switch(tcode[1])
|
||||
{
|
||||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
return SSB_FAIL;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
|
@ -17,7 +17,7 @@ typedef struct cnode {
|
||||
|
||||
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
||||
#define f0_scriptshift 24 /* Shift for script value */
|
||||
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
|
||||
#define f0_rangeflag 0x00800000 /* Flag for a range item */
|
||||
#define f0_charmask 0x001fffff /* Mask for code point value */
|
||||
|
||||
/* Things for the f1 field */
|
||||
|
Loading…
Reference in New Issue
Block a user