mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-26 05:56:14 +01:00
PCRE 7.7
svn path=/trunk/; revision=6938
This commit is contained in:
parent
c9db84f9f2
commit
adae23350a
@ -1,3 +1,7 @@
|
|||||||
|
2008-05-27 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/pcre/*: Update to PCRE 7.7
|
||||||
|
|
||||||
2008-05-26 Matthias Clasen <mclasen@redhat.com>
|
2008-05-26 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
* glib/gchecksum.c: Add Since: tag to g_checksum_reset
|
* glib/gchecksum.c: Add Since: tag to g_checksum_reset
|
||||||
|
@ -1,35 +1,30 @@
|
|||||||
TOP = ..\..\..
|
TOP = ..\..\..
|
||||||
!INCLUDE ..\..\build\win32\make.msc
|
!INCLUDE ..\..\build\win32\make.msc
|
||||||
|
|
||||||
INCLUDES = \\
|
INCLUDES = \
|
||||||
-I ..\.. \\
|
-I ..\.. \
|
||||||
-I ..
|
-I ..
|
||||||
|
|
||||||
DEFINES = \\
|
DEFINES = \
|
||||||
-DPCRE_STATIC \\
|
-DPCRE_STATIC \
|
||||||
-DHAVE_CONFIG_H \\
|
-DHAVE_CONFIG_H \
|
||||||
-DHAVE_LONG_LONG_FORMAT \\
|
-DHAVE_LONG_LONG_FORMAT \
|
||||||
-DSUPPORT_UCP \\
|
-DSUPPORT_UCP \
|
||||||
-DSUPPORT_UTF8 \\
|
-DSUPPORT_UTF8 \
|
||||||
-DNEWLINE=-1 \\
|
-DNEWLINE=-1 \
|
||||||
-DMATCH_LIMIT=10000000 \\
|
-DMATCH_LIMIT=10000000 \
|
||||||
-DMATCH_LIMIT_RECURSION=10000000 \\
|
-DMATCH_LIMIT_RECURSION=10000000 \
|
||||||
-DMAX_NAME_SIZE=32 \\
|
-DMAX_NAME_SIZE=32 \
|
||||||
-DMAX_NAME_COUNT=10000 \\
|
-DMAX_NAME_COUNT=10000 \
|
||||||
-DMAX_DUPLENGTH=30000 \\
|
-DMAX_DUPLENGTH=30000 \
|
||||||
-DLINK_SIZE=2 \\
|
-DLINK_SIZE=2 \
|
||||||
-DEBCDIC=0 \\
|
-DEBCDIC=0 \
|
||||||
-DPOSIX_MALLOC_THRESHOLD=10
|
-DPOSIX_MALLOC_THRESHOLD=10
|
||||||
|
|
||||||
OBJECTS = \\
|
OBJECTS = \
|
||||||
`
|
|
||||||
for f in $all_files; do
|
|
||||||
echo " $f.obj \\\\"
|
|
||||||
done
|
|
||||||
`
|
|
||||||
|
|
||||||
all : pcre.lib
|
all : pcre.lib
|
||||||
|
|
||||||
pcre.lib : \$(OBJECTS)
|
pcre.lib : $(OBJECTS)
|
||||||
lib -out:pcre.lib \$(OBJECTS)
|
lib -out:pcre.lib $(OBJECTS)
|
||||||
|
|
||||||
|
@ -42,10 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
/* The current PCRE version information. */
|
/* The current PCRE version information. */
|
||||||
|
|
||||||
#define PCRE_MAJOR 7
|
#define PCRE_MAJOR 7
|
||||||
|
#define PCRE_MINOR 7
|
||||||
#define PCRE_MINOR 6
|
|
||||||
#define PCRE_PRERELEASE
|
#define PCRE_PRERELEASE
|
||||||
#define PCRE_DATE 2008-01-28
|
#define PCRE_DATE 2008-05-07
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
imported have to be identified as such. When building PCRE, the appropriate
|
imported have to be identified as such. When building PCRE, the appropriate
|
||||||
@ -125,6 +124,7 @@ extern "C" {
|
|||||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
||||||
#define PCRE_BSR_ANYCRLF 0x00800000
|
#define PCRE_BSR_ANYCRLF 0x00800000
|
||||||
#define PCRE_BSR_UNICODE 0x01000000
|
#define PCRE_BSR_UNICODE 0x01000000
|
||||||
|
#define PCRE_JAVASCRIPT_COMPAT 0x02000000
|
||||||
|
|
||||||
/* Exec-time and get/set-time error codes */
|
/* Exec-time and get/set-time error codes */
|
||||||
|
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||||
|
* the update of the local copy of PCRE.
|
||||||
|
*/
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -158,7 +158,7 @@ static const char verbnames[] =
|
|||||||
"SKIP\0"
|
"SKIP\0"
|
||||||
"THEN";
|
"THEN";
|
||||||
|
|
||||||
static verbitem verbs[] = {
|
static const verbitem verbs[] = {
|
||||||
{ 6, OP_ACCEPT },
|
{ 6, OP_ACCEPT },
|
||||||
{ 6, OP_COMMIT },
|
{ 6, OP_COMMIT },
|
||||||
{ 1, OP_FAIL },
|
{ 1, OP_FAIL },
|
||||||
@ -168,7 +168,7 @@ static verbitem verbs[] = {
|
|||||||
{ 4, OP_THEN }
|
{ 4, OP_THEN }
|
||||||
};
|
};
|
||||||
|
|
||||||
static int verbcount = sizeof(verbs)/sizeof(verbitem);
|
static const int verbcount = sizeof(verbs)/sizeof(verbitem);
|
||||||
|
|
||||||
|
|
||||||
/* Tables of names of POSIX character classes and their lengths. The names are
|
/* Tables of names of POSIX character classes and their lengths. The names are
|
||||||
@ -295,14 +295,15 @@ static const char error_texts[] =
|
|||||||
/* 55 */
|
/* 55 */
|
||||||
"repeating a DEFINE group is not allowed\0"
|
"repeating a DEFINE group is not allowed\0"
|
||||||
"inconsistent NEWLINE options\0"
|
"inconsistent NEWLINE options\0"
|
||||||
"\\g is not followed by a braced name or an optionally braced non-zero number\0"
|
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||||
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number\0"
|
"a numbered reference must not be zero\0"
|
||||||
"(*VERB) with an argument is not supported\0"
|
"(*VERB) with an argument is not supported\0"
|
||||||
/* 60 */
|
/* 60 */
|
||||||
"(*VERB) not recognized\0"
|
"(*VERB) not recognized\0"
|
||||||
"number is too big\0"
|
"number is too big\0"
|
||||||
"subpattern name expected\0"
|
"subpattern name expected\0"
|
||||||
"digit expected after (?+";
|
"digit expected after (?+\0"
|
||||||
|
"] is an invalid data character in JavaScript compatibility mode";
|
||||||
|
|
||||||
|
|
||||||
/* Definition to allow mutual recursion */
|
/* Definition to allow mutual recursion */
|
||||||
@ -378,9 +379,15 @@ if (c == 0) *errorcodeptr = ERR1;
|
|||||||
in a table. A non-zero result is something that can be returned immediately.
|
in a table. A non-zero result is something that can be returned immediately.
|
||||||
Otherwise further processing may be required. */
|
Otherwise further processing may be required. */
|
||||||
|
|
||||||
|
#ifndef EBCDIC /* ASCII coding */
|
||||||
else if (c < '0' || c > 'z') {} /* Not alphanumeric */
|
else if (c < '0' || c > 'z') {} /* Not alphanumeric */
|
||||||
else if ((i = escapes[c - '0']) != 0) c = i;
|
else if ((i = escapes[c - '0']) != 0) c = i;
|
||||||
|
|
||||||
|
#else /* EBCDIC coding */
|
||||||
|
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphanumeric */
|
||||||
|
else if ((i = escapes[c - 0x48]) != 0) c = i;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Escapes that need further processing, or are illegal. */
|
/* Escapes that need further processing, or are illegal. */
|
||||||
|
|
||||||
else
|
else
|
||||||
@ -401,14 +408,31 @@ else
|
|||||||
*errorcodeptr = ERR37;
|
*errorcodeptr = ERR37;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* \g must be followed by a number, either plain or braced. If positive, it
|
/* \g must be followed by one of a number of specific things:
|
||||||
is an absolute backreference. If negative, it is a relative backreference.
|
|
||||||
This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
|
(1) A number, either plain or braced. If positive, it is an absolute
|
||||||
reference to a named group. This is part of Perl's movement towards a
|
backreference. If negative, it is a relative backreference. This is a Perl
|
||||||
unified syntax for back references. As this is synonymous with \k{name}, we
|
5.10 feature.
|
||||||
fudge it up by pretending it really was \k. */
|
|
||||||
|
(2) Perl 5.10 also supports \g{name} as a reference to a named group. This
|
||||||
|
is part of Perl's movement towards a unified syntax for back references. As
|
||||||
|
this is synonymous with \k{name}, we fudge it up by pretending it really
|
||||||
|
was \k.
|
||||||
|
|
||||||
|
(3) For Oniguruma compatibility we also support \g followed by a name or a
|
||||||
|
number either in angle brackets or in single quotes. However, these are
|
||||||
|
(possibly recursive) subroutine calls, _not_ backreferences. Just return
|
||||||
|
the -ESC_g code (cf \k). */
|
||||||
|
|
||||||
case 'g':
|
case 'g':
|
||||||
|
if (ptr[1] == '<' || ptr[1] == '\'')
|
||||||
|
{
|
||||||
|
c = -ESC_g;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle the Perl-compatible cases */
|
||||||
|
|
||||||
if (ptr[1] == '{')
|
if (ptr[1] == '{')
|
||||||
{
|
{
|
||||||
const uschar *p;
|
const uschar *p;
|
||||||
@ -435,18 +459,24 @@ else
|
|||||||
while (g_ascii_isdigit(ptr[1]) != 0)
|
while (g_ascii_isdigit(ptr[1]) != 0)
|
||||||
c = c * 10 + *(++ptr) - '0';
|
c = c * 10 + *(++ptr) - '0';
|
||||||
|
|
||||||
if (c < 0)
|
if (c < 0) /* Integer overflow */
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR61;
|
*errorcodeptr = ERR61;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c == 0 || (braced && *(++ptr) != '}'))
|
if (braced && *(++ptr) != '}')
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR57;
|
*errorcodeptr = ERR57;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (c == 0)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR58;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (negated)
|
if (negated)
|
||||||
{
|
{
|
||||||
if (c > bracount)
|
if (c > bracount)
|
||||||
@ -481,7 +511,7 @@ else
|
|||||||
c -= '0';
|
c -= '0';
|
||||||
while (g_ascii_isdigit(ptr[1]) != 0)
|
while (g_ascii_isdigit(ptr[1]) != 0)
|
||||||
c = c * 10 + *(++ptr) - '0';
|
c = c * 10 + *(++ptr) - '0';
|
||||||
if (c < 0)
|
if (c < 0) /* Integer overflow */
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR61;
|
*errorcodeptr = ERR61;
|
||||||
break;
|
break;
|
||||||
@ -822,7 +852,7 @@ be terminated by '>' because that is checked in the first pass.
|
|||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
ptr current position in the pattern
|
ptr current position in the pattern
|
||||||
count current count of capturing parens so far encountered
|
cd compile background data
|
||||||
name name to seek, or NULL if seeking a numbered subpattern
|
name name to seek, or NULL if seeking a numbered subpattern
|
||||||
lorn name length, or subpattern number if name is NULL
|
lorn name length, or subpattern number if name is NULL
|
||||||
xmode TRUE if we are in /x mode
|
xmode TRUE if we are in /x mode
|
||||||
@ -831,10 +861,11 @@ Returns: the number of the named subpattern, or -1 if not found
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
find_parens(const uschar *ptr, int count, const uschar *name, int lorn,
|
find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,
|
||||||
BOOL xmode)
|
BOOL xmode)
|
||||||
{
|
{
|
||||||
const uschar *thisname;
|
const uschar *thisname;
|
||||||
|
int count = cd->bracount;
|
||||||
|
|
||||||
for (; *ptr != 0; ptr++)
|
for (; *ptr != 0; ptr++)
|
||||||
{
|
{
|
||||||
@ -854,10 +885,34 @@ for (; *ptr != 0; ptr++)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip over character classes */
|
/* Skip over character classes; this logic must be similar to the way they
|
||||||
|
are handled for real. If the first character is '^', skip it. Also, if the
|
||||||
|
first few characters (either before or after ^) are \Q\E or \E we skip them
|
||||||
|
too. This makes for compatibility with Perl. */
|
||||||
|
|
||||||
if (*ptr == '[')
|
if (*ptr == '[')
|
||||||
{
|
{
|
||||||
|
BOOL negate_class = FALSE;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
int c = *(++ptr);
|
||||||
|
if (c == '\\')
|
||||||
|
{
|
||||||
|
if (ptr[1] == 'E') ptr++;
|
||||||
|
else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
else if (!negate_class && c == '^')
|
||||||
|
negate_class = TRUE;
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the next character is ']', it is a data character that must be
|
||||||
|
skipped, except in JavaScript compatibility mode. */
|
||||||
|
|
||||||
|
if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
|
||||||
|
ptr++;
|
||||||
|
|
||||||
while (*(++ptr) != ']')
|
while (*(++ptr) != ']')
|
||||||
{
|
{
|
||||||
if (*ptr == 0) return -1;
|
if (*ptr == 0) return -1;
|
||||||
@ -1122,6 +1177,7 @@ for (;;)
|
|||||||
case OP_NOT_WORDCHAR:
|
case OP_NOT_WORDCHAR:
|
||||||
case OP_WORDCHAR:
|
case OP_WORDCHAR:
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
|
case OP_ALLANY:
|
||||||
branchlength++;
|
branchlength++;
|
||||||
cc++;
|
cc++;
|
||||||
break;
|
break;
|
||||||
@ -1414,7 +1470,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
|||||||
|
|
||||||
/* Groups with zero repeats can of course be empty; skip them. */
|
/* Groups with zero repeats can of course be empty; skip them. */
|
||||||
|
|
||||||
if (c == OP_BRAZERO || c == OP_BRAMINZERO)
|
if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)
|
||||||
{
|
{
|
||||||
code += _pcre_OP_lengths[c];
|
code += _pcre_OP_lengths[c];
|
||||||
do code += GET(code, 1); while (*code == OP_ALT);
|
do code += GET(code, 1); while (*code == OP_ALT);
|
||||||
@ -1500,6 +1556,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
|||||||
case OP_NOT_WORDCHAR:
|
case OP_NOT_WORDCHAR:
|
||||||
case OP_WORDCHAR:
|
case OP_WORDCHAR:
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
|
case OP_ALLANY:
|
||||||
case OP_ANYBYTE:
|
case OP_ANYBYTE:
|
||||||
case OP_CHAR:
|
case OP_CHAR:
|
||||||
case OP_CHARNC:
|
case OP_CHARNC:
|
||||||
@ -1694,11 +1751,12 @@ return -1;
|
|||||||
that is referenced. This means that groups can be replicated for fixed
|
that is referenced. This means that groups can be replicated for fixed
|
||||||
repetition simply by copying (because the recursion is allowed to refer to
|
repetition simply by copying (because the recursion is allowed to refer to
|
||||||
earlier groups that are outside the current group). However, when a group is
|
earlier groups that are outside the current group). However, when a group is
|
||||||
optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before
|
optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is
|
||||||
it, after it has been compiled. This means that any OP_RECURSE items within it
|
inserted before it, after it has been compiled. This means that any OP_RECURSE
|
||||||
that refer to the group itself or any contained groups have to have their
|
items within it that refer to the group itself or any contained groups have to
|
||||||
offsets adjusted. That one of the jobs of this function. Before it is called,
|
have their offsets adjusted. That one of the jobs of this function. Before it
|
||||||
the partially compiled regex must be temporarily terminated with OP_END.
|
is called, the partially compiled regex must be temporarily terminated with
|
||||||
|
OP_END.
|
||||||
|
|
||||||
This function has been extended with the possibility of forward references for
|
This function has been extended with the possibility of forward references for
|
||||||
recursions and subroutine calls. It must also check the list of such references
|
recursions and subroutine calls. It must also check the list of such references
|
||||||
@ -1983,7 +2041,6 @@ if (next >= 0) switch(op_code)
|
|||||||
/* For OP_NOT, "item" must be a single-byte character. */
|
/* For OP_NOT, "item" must be a single-byte character. */
|
||||||
|
|
||||||
case OP_NOT:
|
case OP_NOT:
|
||||||
if (next < 0) return FALSE; /* Not a character */
|
|
||||||
if (item == next) return TRUE;
|
if (item == next) return TRUE;
|
||||||
if ((options & PCRE_CASELESS) == 0) return FALSE;
|
if ((options & PCRE_CASELESS) == 0) return FALSE;
|
||||||
#ifdef SUPPORT_UTF8
|
#ifdef SUPPORT_UTF8
|
||||||
@ -2486,7 +2543,7 @@ for (;; ptr++)
|
|||||||
zerofirstbyte = firstbyte;
|
zerofirstbyte = firstbyte;
|
||||||
zeroreqbyte = reqbyte;
|
zeroreqbyte = reqbyte;
|
||||||
previous = code;
|
previous = code;
|
||||||
*code++ = OP_ANY;
|
*code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
@ -2501,7 +2558,17 @@ for (;; ptr++)
|
|||||||
opcode is compiled. It may optionally have a bit map for characters < 256,
|
opcode is compiled. It may optionally have a bit map for characters < 256,
|
||||||
but those above are are explicitly listed afterwards. A flag byte tells
|
but those above are are explicitly listed afterwards. A flag byte tells
|
||||||
whether the bitmap is present, and whether this is a negated class or not.
|
whether the bitmap is present, and whether this is a negated class or not.
|
||||||
*/
|
|
||||||
|
In JavaScript compatibility mode, an isolated ']' causes an error. In
|
||||||
|
default (Perl) mode, it is treated as a data character. */
|
||||||
|
|
||||||
|
case ']':
|
||||||
|
if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR64;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
goto NORMAL_CHAR;
|
||||||
|
|
||||||
case '[':
|
case '[':
|
||||||
previous = code;
|
previous = code;
|
||||||
@ -2535,6 +2602,19 @@ for (;; ptr++)
|
|||||||
else break;
|
else break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Empty classes are allowed in JavaScript compatibility mode. Otherwise,
|
||||||
|
an initial ']' is taken as a data character -- the code below handles
|
||||||
|
that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
|
||||||
|
[^] must match any character, so generate OP_ALLANY. */
|
||||||
|
|
||||||
|
if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
|
||||||
|
{
|
||||||
|
*code++ = negate_class? OP_ALLANY : OP_FAIL;
|
||||||
|
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
|
||||||
|
zerofirstbyte = firstbyte;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/* If a class contains a negative special such as \S, we need to flip the
|
/* If a class contains a negative special such as \S, we need to flip the
|
||||||
negation flag at the end, so that support for characters > 255 works
|
negation flag at the end, so that support for characters > 255 works
|
||||||
correctly (they are all included in the class). */
|
correctly (they are all included in the class). */
|
||||||
@ -3690,28 +3770,38 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
|||||||
|
|
||||||
if (repeat_min == 0)
|
if (repeat_min == 0)
|
||||||
{
|
{
|
||||||
/* If the maximum is also zero, we just omit the group from the output
|
/* If the maximum is also zero, we used to just omit the group from the
|
||||||
altogether. */
|
output altogether, like this:
|
||||||
|
|
||||||
if (repeat_max == 0)
|
** if (repeat_max == 0)
|
||||||
{
|
** {
|
||||||
code = previous;
|
** code = previous;
|
||||||
goto END_REPEAT;
|
** goto END_REPEAT;
|
||||||
}
|
** }
|
||||||
|
|
||||||
/* If the maximum is 1 or unlimited, we just have to stick in the
|
However, that fails when a group is referenced as a subroutine from
|
||||||
BRAZERO and do no more at this point. However, we do need to adjust
|
elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it
|
||||||
any OP_RECURSE calls inside the group that refer to the group itself or
|
so that it is skipped on execution. As we don't have a list of which
|
||||||
any internal or forward referenced group, because the offset is from
|
groups are referenced, we cannot do this selectively.
|
||||||
the start of the whole regex. Temporarily terminate the pattern while
|
|
||||||
doing this. */
|
|
||||||
|
|
||||||
if (repeat_max <= 1)
|
If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
|
||||||
|
and do no more at this point. However, we do need to adjust any
|
||||||
|
OP_RECURSE calls inside the group that refer to the group itself or any
|
||||||
|
internal or forward referenced group, because the offset is from the
|
||||||
|
start of the whole regex. Temporarily terminate the pattern while doing
|
||||||
|
this. */
|
||||||
|
|
||||||
|
if (repeat_max <= 1) /* Covers 0, 1, and unlimited */
|
||||||
{
|
{
|
||||||
*code = OP_END;
|
*code = OP_END;
|
||||||
adjust_recurse(previous, 1, utf8, cd, save_hwm);
|
adjust_recurse(previous, 1, utf8, cd, save_hwm);
|
||||||
memmove(previous+1, previous, len);
|
memmove(previous+1, previous, len);
|
||||||
code++;
|
code++;
|
||||||
|
if (repeat_max == 0)
|
||||||
|
{
|
||||||
|
*previous++ = OP_SKIPZERO;
|
||||||
|
goto END_REPEAT;
|
||||||
|
}
|
||||||
*previous++ = OP_BRAZERO + repeat_type;
|
*previous++ = OP_BRAZERO + repeat_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3906,6 +3996,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If previous is OP_FAIL, it was generated by an empty class [] in
|
||||||
|
JavaScript mode. The other ways in which OP_FAIL can be generated, that is
|
||||||
|
by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat"
|
||||||
|
error above. We can just ignore the repeat in JS case. */
|
||||||
|
|
||||||
|
else if (*previous == OP_FAIL) goto END_REPEAT;
|
||||||
|
|
||||||
/* Else there's some kind of shambles */
|
/* Else there's some kind of shambles */
|
||||||
|
|
||||||
else
|
else
|
||||||
@ -4192,7 +4289,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
|||||||
|
|
||||||
/* Search the pattern for a forward reference */
|
/* Search the pattern for a forward reference */
|
||||||
|
|
||||||
else if ((i = find_parens(ptr, cd->bracount, name, namelen,
|
else if ((i = find_parens(ptr, cd, name, namelen,
|
||||||
(options & PCRE_EXTENDED) != 0)) > 0)
|
(options & PCRE_EXTENDED) != 0)) > 0)
|
||||||
{
|
{
|
||||||
PUT2(code, 2+LINK_SIZE, i);
|
PUT2(code, 2+LINK_SIZE, i);
|
||||||
@ -4438,7 +4535,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
|||||||
references (?P=name) and recursion (?P>name), as well as falling
|
references (?P=name) and recursion (?P>name), as well as falling
|
||||||
through from the Perl recursion syntax (?&name). We also come here from
|
through from the Perl recursion syntax (?&name). We also come here from
|
||||||
the Perl \k<name> or \k'name' back reference syntax and the \k{name}
|
the Perl \k<name> or \k'name' back reference syntax and the \k{name}
|
||||||
.NET syntax. */
|
.NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */
|
||||||
|
|
||||||
NAMED_REF_OR_RECURSE:
|
NAMED_REF_OR_RECURSE:
|
||||||
name = ++ptr;
|
name = ++ptr;
|
||||||
@ -4489,7 +4586,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
|||||||
recno = GET2(slot, 0);
|
recno = GET2(slot, 0);
|
||||||
}
|
}
|
||||||
else if ((recno = /* Forward back reference */
|
else if ((recno = /* Forward back reference */
|
||||||
find_parens(ptr, cd->bracount, name, namelen,
|
find_parens(ptr, cd, name, namelen,
|
||||||
(options & PCRE_EXTENDED) != 0)) <= 0)
|
(options & PCRE_EXTENDED) != 0)) <= 0)
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR15;
|
*errorcodeptr = ERR15;
|
||||||
@ -4516,6 +4613,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
|||||||
case '5': case '6': case '7': case '8': case '9': /* subroutine */
|
case '5': case '6': case '7': case '8': case '9': /* subroutine */
|
||||||
{
|
{
|
||||||
const uschar *called;
|
const uschar *called;
|
||||||
|
terminator = ')';
|
||||||
|
|
||||||
|
/* Come here from the \g<...> and \g'...' code (Oniguruma
|
||||||
|
compatibility). However, the syntax has been checked to ensure that
|
||||||
|
the ... are a (signed) number, so that neither ERR63 nor ERR29 will
|
||||||
|
be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY
|
||||||
|
ever be taken. */
|
||||||
|
|
||||||
|
HANDLE_NUMERICAL_RECURSION:
|
||||||
|
|
||||||
if ((refsign = *ptr) == '+')
|
if ((refsign = *ptr) == '+')
|
||||||
{
|
{
|
||||||
@ -4537,7 +4643,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
|||||||
while(g_ascii_isdigit(*ptr) != 0)
|
while(g_ascii_isdigit(*ptr) != 0)
|
||||||
recno = recno * 10 + *ptr++ - '0';
|
recno = recno * 10 + *ptr++ - '0';
|
||||||
|
|
||||||
if (*ptr != ')')
|
if (*ptr != terminator)
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR29;
|
*errorcodeptr = ERR29;
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
@ -4590,8 +4696,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
|||||||
|
|
||||||
if (called == NULL)
|
if (called == NULL)
|
||||||
{
|
{
|
||||||
if (find_parens(ptr, cd->bracount, NULL, recno,
|
if (find_parens(ptr, cd, NULL, recno,
|
||||||
(options & PCRE_EXTENDED) != 0) < 0)
|
(options & PCRE_EXTENDED) != 0) < 0)
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR15;
|
*errorcodeptr = ERR15;
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
@ -4961,6 +5067,64 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
|||||||
zerofirstbyte = firstbyte;
|
zerofirstbyte = firstbyte;
|
||||||
zeroreqbyte = reqbyte;
|
zeroreqbyte = reqbyte;
|
||||||
|
|
||||||
|
/* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
|
||||||
|
is a subroutine call by number (Oniguruma syntax). In fact, the value
|
||||||
|
-ESC_g is returned only for these cases. So we don't need to check for <
|
||||||
|
or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is
|
||||||
|
-ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as
|
||||||
|
that is a synonym for a named back reference). */
|
||||||
|
|
||||||
|
if (-c == ESC_g)
|
||||||
|
{
|
||||||
|
const uschar *p;
|
||||||
|
save_hwm = cd->hwm; /* Normally this is set when '(' is read */
|
||||||
|
terminator = (*(++ptr) == '<')? '>' : '\'';
|
||||||
|
|
||||||
|
/* These two statements stop the compiler for warning about possibly
|
||||||
|
unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
|
||||||
|
fact, because we actually check for a number below, the paths that
|
||||||
|
would actually be in error are never taken. */
|
||||||
|
|
||||||
|
skipbytes = 0;
|
||||||
|
reset_bracount = FALSE;
|
||||||
|
|
||||||
|
/* Test for a name */
|
||||||
|
|
||||||
|
if (ptr[1] != '+' && ptr[1] != '-')
|
||||||
|
{
|
||||||
|
BOOL isnumber = TRUE;
|
||||||
|
for (p = ptr + 1; *p != 0 && *p != terminator; p++)
|
||||||
|
{
|
||||||
|
if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
|
||||||
|
if ((cd->ctypes[*p] & ctype_word) == 0) break;
|
||||||
|
}
|
||||||
|
if (*p != terminator)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR57;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (isnumber)
|
||||||
|
{
|
||||||
|
ptr++;
|
||||||
|
goto HANDLE_NUMERICAL_RECURSION;
|
||||||
|
}
|
||||||
|
is_recurse = TRUE;
|
||||||
|
goto NAMED_REF_OR_RECURSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test a signed number in angle brackets or quotes. */
|
||||||
|
|
||||||
|
p = ptr + 2;
|
||||||
|
while (g_ascii_isdigit(*p) != 0) p++;
|
||||||
|
if (*p != terminator)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR57;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ptr++;
|
||||||
|
goto HANDLE_NUMERICAL_RECURSION;
|
||||||
|
}
|
||||||
|
|
||||||
/* \k<name> or \k'name' is a back reference by name (Perl syntax).
|
/* \k<name> or \k'name' is a back reference by name (Perl syntax).
|
||||||
We also support \k{name} (.NET syntax) */
|
We also support \k{name} (.NET syntax) */
|
||||||
|
|
||||||
@ -5467,14 +5631,14 @@ do {
|
|||||||
if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;
|
if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* .* is not anchored unless DOTALL is set and it isn't in brackets that
|
/* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
|
||||||
are or may be referenced. */
|
it isn't in brackets that are or may be referenced. */
|
||||||
|
|
||||||
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
|
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
|
||||||
op == OP_TYPEPOSSTAR) &&
|
op == OP_TYPEPOSSTAR))
|
||||||
(*options & PCRE_DOTALL) != 0)
|
|
||||||
{
|
{
|
||||||
if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;
|
if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0)
|
||||||
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for explicit anchoring */
|
/* Check for explicit anchoring */
|
||||||
|
@ -84,11 +84,11 @@ centralize the loading of these characters. In the case of Type * etc, the
|
|||||||
small value. ***NOTE*** If the start of this table is modified, the two tables
|
small value. ***NOTE*** If the start of this table is modified, the two tables
|
||||||
that follow must also be modified. */
|
that follow must also be modified. */
|
||||||
|
|
||||||
static uschar coptable[] = {
|
static const uschar coptable[] = {
|
||||||
0, /* End */
|
0, /* End */
|
||||||
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
|
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
|
||||||
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
|
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
|
||||||
0, 0, /* Any, Anybyte */
|
0, 0, 0, /* Any, AllAny, Anybyte */
|
||||||
0, 0, 0, /* NOTPROP, PROP, EXTUNI */
|
0, 0, 0, /* NOTPROP, PROP, EXTUNI */
|
||||||
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
||||||
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
|
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
|
||||||
@ -132,26 +132,26 @@ static uschar coptable[] = {
|
|||||||
0, /* DEF */
|
0, /* DEF */
|
||||||
0, 0, /* BRAZERO, BRAMINZERO */
|
0, 0, /* BRAZERO, BRAMINZERO */
|
||||||
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
|
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
|
||||||
0, 0 /* FAIL, ACCEPT */
|
0, 0, 0 /* FAIL, ACCEPT, SKIPZERO */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
||||||
and \w */
|
and \w */
|
||||||
|
|
||||||
static uschar toptable1[] = {
|
static const uschar toptable1[] = {
|
||||||
0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0,
|
||||||
ctype_digit, ctype_digit,
|
ctype_digit, ctype_digit,
|
||||||
ctype_space, ctype_space,
|
ctype_space, ctype_space,
|
||||||
ctype_word, ctype_word,
|
ctype_word, ctype_word,
|
||||||
0 /* OP_ANY */
|
0, 0 /* OP_ANY, OP_ALLANY */
|
||||||
};
|
};
|
||||||
|
|
||||||
static uschar toptable2[] = {
|
static const uschar toptable2[] = {
|
||||||
0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0,
|
||||||
ctype_digit, 0,
|
ctype_digit, 0,
|
||||||
ctype_space, 0,
|
ctype_space, 0,
|
||||||
ctype_word, 0,
|
ctype_word, 0,
|
||||||
1 /* OP_ANY */
|
1, 1 /* OP_ANY, OP_ALLANY */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -223,8 +223,8 @@ Arguments:
|
|||||||
rlevel function call recursion level
|
rlevel function call recursion level
|
||||||
recursing regex recursive call level
|
recursing regex recursive call level
|
||||||
|
|
||||||
Returns: > 0 =>
|
Returns: > 0 => number of match offset pairs placed in offsets
|
||||||
= 0 =>
|
= 0 => offsets overflowed; longest matches are present
|
||||||
-1 => failed to match
|
-1 => failed to match
|
||||||
< -1 => some kind of unexpected problem
|
< -1 => some kind of unexpected problem
|
||||||
|
|
||||||
@ -693,6 +693,13 @@ for (;;)
|
|||||||
ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
|
ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_SKIPZERO:
|
||||||
|
code += 1 + GET(code, 2);
|
||||||
|
while (*code == OP_ALT) code += GET(code, 1);
|
||||||
|
ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
case OP_CIRC:
|
case OP_CIRC:
|
||||||
if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
|
if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
|
||||||
@ -732,7 +739,13 @@ for (;;)
|
|||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
|
if (clen > 0 && !IS_NEWLINE(ptr))
|
||||||
|
{ ADD_NEW(state_offset + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_ALLANY:
|
||||||
|
if (clen > 0)
|
||||||
{ ADD_NEW(state_offset + 1, 0); }
|
{ ADD_NEW(state_offset + 1, 0); }
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -852,8 +865,8 @@ for (;;)
|
|||||||
/* ========================================================================== */
|
/* ========================================================================== */
|
||||||
/* These opcodes likewise inspect the subject character, but have an
|
/* These opcodes likewise inspect the subject character, but have an
|
||||||
argument that is not a data character. It is one of these opcodes:
|
argument that is not a data character. It is one of these opcodes:
|
||||||
OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,
|
OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
|
||||||
OP_NOT_WORDCHAR. The value is loaded into d. */
|
OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
|
||||||
|
|
||||||
case OP_TYPEPLUS:
|
case OP_TYPEPLUS:
|
||||||
case OP_TYPEMINPLUS:
|
case OP_TYPEMINPLUS:
|
||||||
@ -864,10 +877,7 @@ for (;;)
|
|||||||
{
|
{
|
||||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||||
(c < 256 &&
|
(c < 256 &&
|
||||||
(d != OP_ANY ||
|
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||||
(ims & PCRE_DOTALL) != 0 ||
|
|
||||||
!IS_NEWLINE(ptr)
|
|
||||||
) &&
|
|
||||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||||
{
|
{
|
||||||
if (count > 0 && codevalue == OP_TYPEPOSPLUS)
|
if (count > 0 && codevalue == OP_TYPEPOSPLUS)
|
||||||
@ -890,10 +900,7 @@ for (;;)
|
|||||||
{
|
{
|
||||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||||
(c < 256 &&
|
(c < 256 &&
|
||||||
(d != OP_ANY ||
|
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||||
(ims & PCRE_DOTALL) != 0 ||
|
|
||||||
!IS_NEWLINE(ptr)
|
|
||||||
) &&
|
|
||||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||||
{
|
{
|
||||||
if (codevalue == OP_TYPEPOSQUERY)
|
if (codevalue == OP_TYPEPOSQUERY)
|
||||||
@ -915,10 +922,7 @@ for (;;)
|
|||||||
{
|
{
|
||||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||||
(c < 256 &&
|
(c < 256 &&
|
||||||
(d != OP_ANY ||
|
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||||
(ims & PCRE_DOTALL) != 0 ||
|
|
||||||
!IS_NEWLINE(ptr)
|
|
||||||
) &&
|
|
||||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||||
{
|
{
|
||||||
if (codevalue == OP_TYPEPOSSTAR)
|
if (codevalue == OP_TYPEPOSSTAR)
|
||||||
@ -938,10 +942,7 @@ for (;;)
|
|||||||
{
|
{
|
||||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||||
(c < 256 &&
|
(c < 256 &&
|
||||||
(d != OP_ANY ||
|
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||||
(ims & PCRE_DOTALL) != 0 ||
|
|
||||||
!IS_NEWLINE(ptr)
|
|
||||||
) &&
|
|
||||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||||
{
|
{
|
||||||
if (++count >= GET2(code, 1))
|
if (++count >= GET2(code, 1))
|
||||||
@ -962,10 +963,7 @@ for (;;)
|
|||||||
{
|
{
|
||||||
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||||
(c < 256 &&
|
(c < 256 &&
|
||||||
(d != OP_ANY ||
|
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||||
(ims & PCRE_DOTALL) != 0 ||
|
|
||||||
!IS_NEWLINE(ptr)
|
|
||||||
) &&
|
|
||||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||||
{
|
{
|
||||||
if (codevalue == OP_TYPEPOSUPTO)
|
if (codevalue == OP_TYPEPOSUPTO)
|
||||||
@ -2162,7 +2160,12 @@ for (;;)
|
|||||||
|
|
||||||
/* ========================================================================== */
|
/* ========================================================================== */
|
||||||
/* These are the opcodes for fancy brackets of various kinds. We have
|
/* These are the opcodes for fancy brackets of various kinds. We have
|
||||||
to use recursion in order to handle them. */
|
to use recursion in order to handle them. The "always failing" assersion
|
||||||
|
(?!) is optimised when compiling to OP_FAIL, so we have to support that,
|
||||||
|
though the other "backtracking verbs" are not supported. */
|
||||||
|
|
||||||
|
case OP_FAIL:
|
||||||
|
break;
|
||||||
|
|
||||||
case OP_ASSERT:
|
case OP_ASSERT:
|
||||||
case OP_ASSERT_NOT:
|
case OP_ASSERT_NOT:
|
||||||
|
@ -1148,11 +1148,11 @@ for (;;)
|
|||||||
do ecode += GET(ecode,1); while (*ecode == OP_ALT);
|
do ecode += GET(ecode,1); while (*ecode == OP_ALT);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
|
/* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
|
||||||
that it may occur zero times. It may repeat infinitely, or not at all -
|
indicating that it may occur zero times. It may repeat infinitely, or not
|
||||||
i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
|
at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
|
||||||
repeat limits are compiled as a number of copies, with the optional ones
|
with fixed upper repeat limits are compiled as a number of copies, with the
|
||||||
preceded by BRAZERO or BRAMINZERO. */
|
optional ones preceded by BRAZERO or BRAMINZERO. */
|
||||||
|
|
||||||
case OP_BRAZERO:
|
case OP_BRAZERO:
|
||||||
{
|
{
|
||||||
@ -1174,6 +1174,14 @@ for (;;)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OP_SKIPZERO:
|
||||||
|
{
|
||||||
|
next = ecode+1;
|
||||||
|
do next += GET(next,1); while (*next == OP_ALT);
|
||||||
|
ecode = next + 1 + LINK_SIZE;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
/* End of a group, repeated or non-repeating. */
|
/* End of a group, repeated or non-repeating. */
|
||||||
|
|
||||||
case OP_KET:
|
case OP_KET:
|
||||||
@ -1421,13 +1429,12 @@ for (;;)
|
|||||||
/* Match a single character type; inline for speed */
|
/* Match a single character type; inline for speed */
|
||||||
|
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
if ((ims & PCRE_DOTALL) == 0)
|
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
|
||||||
{
|
/* Fall through */
|
||||||
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
|
|
||||||
}
|
case OP_ALLANY:
|
||||||
if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||||
if (utf8)
|
if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
|
||||||
ecode++;
|
ecode++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -1723,16 +1730,25 @@ for (;;)
|
|||||||
case OP_REF:
|
case OP_REF:
|
||||||
{
|
{
|
||||||
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
||||||
ecode += 3; /* Advance past item */
|
ecode += 3;
|
||||||
|
|
||||||
/* If the reference is unset, set the length to be longer than the amount
|
/* If the reference is unset, there are two possibilities:
|
||||||
of subject left; this ensures that every attempt at a match fails. We
|
|
||||||
can't just fail here, because of the possibility of quantifiers with zero
|
|
||||||
minima. */
|
|
||||||
|
|
||||||
length = (offset >= offset_top || md->offset_vector[offset] < 0)?
|
(a) In the default, Perl-compatible state, set the length to be longer
|
||||||
md->end_subject - eptr + 1 :
|
than the amount of subject left; this ensures that every attempt at a
|
||||||
md->offset_vector[offset+1] - md->offset_vector[offset];
|
match fails. We can't just fail here, because of the possibility of
|
||||||
|
quantifiers with zero minima.
|
||||||
|
|
||||||
|
(b) If the JavaScript compatibility flag is set, set the length to zero
|
||||||
|
so that the back reference matches an empty string.
|
||||||
|
|
||||||
|
Otherwise, set the length to the length of what was matched by the
|
||||||
|
referenced subpattern. */
|
||||||
|
|
||||||
|
if (offset >= offset_top || md->offset_vector[offset] < 0)
|
||||||
|
length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
|
||||||
|
else
|
||||||
|
length = md->offset_vector[offset+1] - md->offset_vector[offset];
|
||||||
|
|
||||||
/* Set up for repetition, or handle the non-repeated case */
|
/* Set up for repetition, or handle the non-repeated case */
|
||||||
|
|
||||||
@ -2935,14 +2951,22 @@ for (;;)
|
|||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
for (i = 1; i <= min; i++)
|
for (i = 1; i <= min; i++)
|
||||||
{
|
{
|
||||||
if (eptr >= md->end_subject ||
|
if (eptr >= md->end_subject || IS_NEWLINE(eptr))
|
||||||
((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
eptr++;
|
eptr++;
|
||||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OP_ALLANY:
|
||||||
|
for (i = 1; i <= min; i++)
|
||||||
|
{
|
||||||
|
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||||
|
eptr++;
|
||||||
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case OP_ANYBYTE:
|
case OP_ANYBYTE:
|
||||||
eptr += min;
|
eptr += min;
|
||||||
break;
|
break;
|
||||||
@ -3151,15 +3175,15 @@ for (;;)
|
|||||||
switch(ctype)
|
switch(ctype)
|
||||||
{
|
{
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
if ((ims & PCRE_DOTALL) == 0)
|
for (i = 1; i <= min; i++)
|
||||||
{
|
{
|
||||||
for (i = 1; i <= min; i++)
|
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
|
||||||
{
|
eptr++;
|
||||||
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
|
|
||||||
eptr++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else eptr += min;
|
break;
|
||||||
|
|
||||||
|
case OP_ALLANY:
|
||||||
|
eptr += min;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_ANYBYTE:
|
case OP_ANYBYTE:
|
||||||
@ -3416,16 +3440,14 @@ for (;;)
|
|||||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
|
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
if (fi >= max || eptr >= md->end_subject ||
|
if (fi >= max || eptr >= md->end_subject ||
|
||||||
(ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
|
(ctype == OP_ANY && IS_NEWLINE(eptr)))
|
||||||
IS_NEWLINE(eptr)))
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
|
|
||||||
GETCHARINC(c, eptr);
|
GETCHARINC(c, eptr);
|
||||||
switch(ctype)
|
switch(ctype)
|
||||||
{
|
{
|
||||||
case OP_ANY: /* This is the DOTALL case */
|
case OP_ANY: /* This is the non-NL case */
|
||||||
break;
|
case OP_ALLANY:
|
||||||
|
|
||||||
case OP_ANYBYTE:
|
case OP_ANYBYTE:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -3577,15 +3599,14 @@ for (;;)
|
|||||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
|
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
if (fi >= max || eptr >= md->end_subject ||
|
if (fi >= max || eptr >= md->end_subject ||
|
||||||
((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
|
(ctype == OP_ANY && IS_NEWLINE(eptr)))
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
|
|
||||||
c = *eptr++;
|
c = *eptr++;
|
||||||
switch(ctype)
|
switch(ctype)
|
||||||
{
|
{
|
||||||
case OP_ANY: /* This is the DOTALL case */
|
case OP_ANY: /* This is the non-NL case */
|
||||||
break;
|
case OP_ALLANY:
|
||||||
|
|
||||||
case OP_ANYBYTE:
|
case OP_ANYBYTE:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -3839,23 +3860,11 @@ for (;;)
|
|||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
if (max < INT_MAX)
|
if (max < INT_MAX)
|
||||||
{
|
{
|
||||||
if ((ims & PCRE_DOTALL) == 0)
|
for (i = min; i < max; i++)
|
||||||
{
|
{
|
||||||
for (i = min; i < max; i++)
|
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||||
{
|
eptr++;
|
||||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||||
eptr++;
|
|
||||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (i = min; i < max; i++)
|
|
||||||
{
|
|
||||||
if (eptr >= md->end_subject) break;
|
|
||||||
eptr++;
|
|
||||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3863,22 +3872,28 @@ for (;;)
|
|||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ((ims & PCRE_DOTALL) == 0)
|
for (i = min; i < max; i++)
|
||||||
{
|
{
|
||||||
for (i = min; i < max; i++)
|
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||||
{
|
eptr++;
|
||||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||||
eptr++;
|
|
||||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
eptr = md->end_subject;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OP_ALLANY:
|
||||||
|
if (max < INT_MAX)
|
||||||
|
{
|
||||||
|
for (i = min; i < max; i++)
|
||||||
|
{
|
||||||
|
if (eptr >= md->end_subject) break;
|
||||||
|
eptr++;
|
||||||
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
|
||||||
|
break;
|
||||||
|
|
||||||
/* The byte case is the same as non-UTF8 */
|
/* The byte case is the same as non-UTF8 */
|
||||||
|
|
||||||
case OP_ANYBYTE:
|
case OP_ANYBYTE:
|
||||||
@ -4064,17 +4079,14 @@ for (;;)
|
|||||||
switch(ctype)
|
switch(ctype)
|
||||||
{
|
{
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
if ((ims & PCRE_DOTALL) == 0)
|
for (i = min; i < max; i++)
|
||||||
{
|
{
|
||||||
for (i = min; i < max; i++)
|
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||||
{
|
eptr++;
|
||||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
|
||||||
eptr++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
/* For DOTALL case, fall through and treat as \C */
|
break;
|
||||||
|
|
||||||
|
case OP_ALLANY:
|
||||||
case OP_ANYBYTE:
|
case OP_ANYBYTE:
|
||||||
c = max - min;
|
c = max - min;
|
||||||
if (c > (unsigned int)(md->end_subject - eptr))
|
if (c > (unsigned int)(md->end_subject - eptr))
|
||||||
@ -4450,6 +4462,7 @@ end_subject = md->end_subject;
|
|||||||
|
|
||||||
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
|
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
|
||||||
utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
|
utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
|
||||||
|
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
|
||||||
|
|
||||||
md->notbol = (options & PCRE_NOTBOL) != 0;
|
md->notbol = (options & PCRE_NOTBOL) != 0;
|
||||||
md->noteol = (options & PCRE_NOTEOL) != 0;
|
md->noteol = (options & PCRE_NOTEOL) != 0;
|
||||||
|
@ -52,6 +52,8 @@ differently, and global variables are not used (see pcre.in). */
|
|||||||
|
|
||||||
#include "pcre_internal.h"
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
#ifndef VPCOMPAT
|
||||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* End of pcre_globals.c */
|
/* End of pcre_globals.c */
|
||||||
|
@ -514,7 +514,8 @@ time, run time, or study time, respectively. */
|
|||||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
||||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
||||||
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
|
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
|
||||||
|
PCRE_JAVASCRIPT_COMPAT)
|
||||||
|
|
||||||
#define PUBLIC_EXEC_OPTIONS \
|
#define PUBLIC_EXEC_OPTIONS \
|
||||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||||
@ -601,16 +602,20 @@ contain UTF-8 characters with values greater than 255. */
|
|||||||
value such as \n. They must have non-zero values, as check_escape() returns
|
value such as \n. They must have non-zero values, as check_escape() returns
|
||||||
their negation. Also, they must appear in the same order as in the opcode
|
their negation. Also, they must appear in the same order as in the opcode
|
||||||
definitions below, up to ESC_z. There's a dummy for OP_ANY because it
|
definitions below, up to ESC_z. There's a dummy for OP_ANY because it
|
||||||
corresponds to "." rather than an escape sequence. The final one must be
|
corresponds to "." rather than an escape sequence, and another for OP_ALLANY
|
||||||
ESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc).
|
(which is used for [^] in JavaScript compatibility mode).
|
||||||
There are two tests in the code for an escape greater than ESC_b and less than
|
|
||||||
ESC_Z to detect the types that may be repeated. These are the types that
|
The final escape must be ESC_REF as subsequent values are used for
|
||||||
consume characters. If any new escapes are put in between that don't consume a
|
backreferences (\1, \2, \3, etc). There are two tests in the code for an escape
|
||||||
character, that code will have to change. */
|
greater than ESC_b and less than ESC_Z to detect the types that may be
|
||||||
|
repeated. These are the types that consume characters. If any new escapes are
|
||||||
|
put in between that don't consume a character, that code will have to change.
|
||||||
|
*/
|
||||||
|
|
||||||
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||||
ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
|
ESC_W, ESC_w, ESC_dum1, ESC_dum2, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
|
||||||
ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
|
ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k,
|
||||||
|
ESC_REF };
|
||||||
|
|
||||||
|
|
||||||
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
|
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
|
||||||
@ -636,141 +641,146 @@ enum {
|
|||||||
OP_WHITESPACE, /* 9 \s */
|
OP_WHITESPACE, /* 9 \s */
|
||||||
OP_NOT_WORDCHAR, /* 10 \W */
|
OP_NOT_WORDCHAR, /* 10 \W */
|
||||||
OP_WORDCHAR, /* 11 \w */
|
OP_WORDCHAR, /* 11 \w */
|
||||||
OP_ANY, /* 12 Match any character */
|
OP_ANY, /* 12 Match any character (subject to DOTALL) */
|
||||||
OP_ANYBYTE, /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */
|
OP_ALLANY, /* 13 Match any character (not subject to DOTALL) */
|
||||||
OP_NOTPROP, /* 14 \P (not Unicode property) */
|
OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||||
OP_PROP, /* 15 \p (Unicode property) */
|
OP_NOTPROP, /* 15 \P (not Unicode property) */
|
||||||
OP_ANYNL, /* 16 \R (any newline sequence) */
|
OP_PROP, /* 16 \p (Unicode property) */
|
||||||
OP_NOT_HSPACE, /* 17 \H (not horizontal whitespace) */
|
OP_ANYNL, /* 17 \R (any newline sequence) */
|
||||||
OP_HSPACE, /* 18 \h (horizontal whitespace) */
|
OP_NOT_HSPACE, /* 18 \H (not horizontal whitespace) */
|
||||||
OP_NOT_VSPACE, /* 19 \V (not vertical whitespace) */
|
OP_HSPACE, /* 19 \h (horizontal whitespace) */
|
||||||
OP_VSPACE, /* 20 \v (vertical whitespace) */
|
OP_NOT_VSPACE, /* 20 \V (not vertical whitespace) */
|
||||||
OP_EXTUNI, /* 21 \X (extended Unicode sequence */
|
OP_VSPACE, /* 21 \v (vertical whitespace) */
|
||||||
OP_EODN, /* 22 End of data or \n at end of data: \Z. */
|
OP_EXTUNI, /* 22 \X (extended Unicode sequence */
|
||||||
OP_EOD, /* 23 End of data: \z */
|
OP_EODN, /* 23 End of data or \n at end of data: \Z. */
|
||||||
|
OP_EOD, /* 24 End of data: \z */
|
||||||
|
|
||||||
OP_OPT, /* 24 Set runtime options */
|
OP_OPT, /* 25 Set runtime options */
|
||||||
OP_CIRC, /* 25 Start of line - varies with multiline switch */
|
OP_CIRC, /* 26 Start of line - varies with multiline switch */
|
||||||
OP_DOLL, /* 26 End of line - varies with multiline switch */
|
OP_DOLL, /* 27 End of line - varies with multiline switch */
|
||||||
OP_CHAR, /* 27 Match one character, casefully */
|
OP_CHAR, /* 28 Match one character, casefully */
|
||||||
OP_CHARNC, /* 28 Match one character, caselessly */
|
OP_CHARNC, /* 29 Match one character, caselessly */
|
||||||
OP_NOT, /* 29 Match one character, not the following one */
|
OP_NOT, /* 30 Match one character, not the following one */
|
||||||
|
|
||||||
OP_STAR, /* 30 The maximizing and minimizing versions of */
|
OP_STAR, /* 31 The maximizing and minimizing versions of */
|
||||||
OP_MINSTAR, /* 31 these six opcodes must come in pairs, with */
|
OP_MINSTAR, /* 32 these six opcodes must come in pairs, with */
|
||||||
OP_PLUS, /* 32 the minimizing one second. */
|
OP_PLUS, /* 33 the minimizing one second. */
|
||||||
OP_MINPLUS, /* 33 This first set applies to single characters.*/
|
OP_MINPLUS, /* 34 This first set applies to single characters.*/
|
||||||
OP_QUERY, /* 34 */
|
OP_QUERY, /* 35 */
|
||||||
OP_MINQUERY, /* 35 */
|
OP_MINQUERY, /* 36 */
|
||||||
|
|
||||||
OP_UPTO, /* 36 From 0 to n matches */
|
OP_UPTO, /* 37 From 0 to n matches */
|
||||||
OP_MINUPTO, /* 37 */
|
OP_MINUPTO, /* 38 */
|
||||||
OP_EXACT, /* 38 Exactly n matches */
|
OP_EXACT, /* 39 Exactly n matches */
|
||||||
|
|
||||||
OP_POSSTAR, /* 39 Possessified star */
|
OP_POSSTAR, /* 40 Possessified star */
|
||||||
OP_POSPLUS, /* 40 Possessified plus */
|
OP_POSPLUS, /* 41 Possessified plus */
|
||||||
OP_POSQUERY, /* 41 Posesssified query */
|
OP_POSQUERY, /* 42 Posesssified query */
|
||||||
OP_POSUPTO, /* 42 Possessified upto */
|
OP_POSUPTO, /* 43 Possessified upto */
|
||||||
|
|
||||||
OP_NOTSTAR, /* 43 The maximizing and minimizing versions of */
|
OP_NOTSTAR, /* 44 The maximizing and minimizing versions of */
|
||||||
OP_NOTMINSTAR, /* 44 these six opcodes must come in pairs, with */
|
OP_NOTMINSTAR, /* 45 these six opcodes must come in pairs, with */
|
||||||
OP_NOTPLUS, /* 45 the minimizing one second. They must be in */
|
OP_NOTPLUS, /* 46 the minimizing one second. They must be in */
|
||||||
OP_NOTMINPLUS, /* 46 exactly the same order as those above. */
|
OP_NOTMINPLUS, /* 47 exactly the same order as those above. */
|
||||||
OP_NOTQUERY, /* 47 This set applies to "not" single characters. */
|
OP_NOTQUERY, /* 48 This set applies to "not" single characters. */
|
||||||
OP_NOTMINQUERY, /* 48 */
|
OP_NOTMINQUERY, /* 49 */
|
||||||
|
|
||||||
OP_NOTUPTO, /* 49 From 0 to n matches */
|
OP_NOTUPTO, /* 50 From 0 to n matches */
|
||||||
OP_NOTMINUPTO, /* 50 */
|
OP_NOTMINUPTO, /* 51 */
|
||||||
OP_NOTEXACT, /* 51 Exactly n matches */
|
OP_NOTEXACT, /* 52 Exactly n matches */
|
||||||
|
|
||||||
OP_NOTPOSSTAR, /* 52 Possessified versions */
|
OP_NOTPOSSTAR, /* 53 Possessified versions */
|
||||||
OP_NOTPOSPLUS, /* 53 */
|
OP_NOTPOSPLUS, /* 54 */
|
||||||
OP_NOTPOSQUERY, /* 54 */
|
OP_NOTPOSQUERY, /* 55 */
|
||||||
OP_NOTPOSUPTO, /* 55 */
|
OP_NOTPOSUPTO, /* 56 */
|
||||||
|
|
||||||
OP_TYPESTAR, /* 56 The maximizing and minimizing versions of */
|
OP_TYPESTAR, /* 57 The maximizing and minimizing versions of */
|
||||||
OP_TYPEMINSTAR, /* 57 these six opcodes must come in pairs, with */
|
OP_TYPEMINSTAR, /* 58 these six opcodes must come in pairs, with */
|
||||||
OP_TYPEPLUS, /* 58 the minimizing one second. These codes must */
|
OP_TYPEPLUS, /* 59 the minimizing one second. These codes must */
|
||||||
OP_TYPEMINPLUS, /* 59 be in exactly the same order as those above. */
|
OP_TYPEMINPLUS, /* 60 be in exactly the same order as those above. */
|
||||||
OP_TYPEQUERY, /* 60 This set applies to character types such as \d */
|
OP_TYPEQUERY, /* 61 This set applies to character types such as \d */
|
||||||
OP_TYPEMINQUERY, /* 61 */
|
OP_TYPEMINQUERY, /* 62 */
|
||||||
|
|
||||||
OP_TYPEUPTO, /* 62 From 0 to n matches */
|
OP_TYPEUPTO, /* 63 From 0 to n matches */
|
||||||
OP_TYPEMINUPTO, /* 63 */
|
OP_TYPEMINUPTO, /* 64 */
|
||||||
OP_TYPEEXACT, /* 64 Exactly n matches */
|
OP_TYPEEXACT, /* 65 Exactly n matches */
|
||||||
|
|
||||||
OP_TYPEPOSSTAR, /* 65 Possessified versions */
|
OP_TYPEPOSSTAR, /* 66 Possessified versions */
|
||||||
OP_TYPEPOSPLUS, /* 66 */
|
OP_TYPEPOSPLUS, /* 67 */
|
||||||
OP_TYPEPOSQUERY, /* 67 */
|
OP_TYPEPOSQUERY, /* 68 */
|
||||||
OP_TYPEPOSUPTO, /* 68 */
|
OP_TYPEPOSUPTO, /* 69 */
|
||||||
|
|
||||||
OP_CRSTAR, /* 69 The maximizing and minimizing versions of */
|
OP_CRSTAR, /* 70 The maximizing and minimizing versions of */
|
||||||
OP_CRMINSTAR, /* 70 all these opcodes must come in pairs, with */
|
OP_CRMINSTAR, /* 71 all these opcodes must come in pairs, with */
|
||||||
OP_CRPLUS, /* 71 the minimizing one second. These codes must */
|
OP_CRPLUS, /* 72 the minimizing one second. These codes must */
|
||||||
OP_CRMINPLUS, /* 72 be in exactly the same order as those above. */
|
OP_CRMINPLUS, /* 73 be in exactly the same order as those above. */
|
||||||
OP_CRQUERY, /* 73 These are for character classes and back refs */
|
OP_CRQUERY, /* 74 These are for character classes and back refs */
|
||||||
OP_CRMINQUERY, /* 74 */
|
OP_CRMINQUERY, /* 75 */
|
||||||
OP_CRRANGE, /* 75 These are different to the three sets above. */
|
OP_CRRANGE, /* 76 These are different to the three sets above. */
|
||||||
OP_CRMINRANGE, /* 76 */
|
OP_CRMINRANGE, /* 77 */
|
||||||
|
|
||||||
OP_CLASS, /* 77 Match a character class, chars < 256 only */
|
OP_CLASS, /* 78 Match a character class, chars < 256 only */
|
||||||
OP_NCLASS, /* 78 Same, but the bitmap was created from a negative
|
OP_NCLASS, /* 79 Same, but the bitmap was created from a negative
|
||||||
class - the difference is relevant only when a UTF-8
|
class - the difference is relevant only when a UTF-8
|
||||||
character > 255 is encountered. */
|
character > 255 is encountered. */
|
||||||
|
|
||||||
OP_XCLASS, /* 79 Extended class for handling UTF-8 chars within the
|
OP_XCLASS, /* 80 Extended class for handling UTF-8 chars within the
|
||||||
class. This does both positive and negative. */
|
class. This does both positive and negative. */
|
||||||
|
|
||||||
OP_REF, /* 80 Match a back reference */
|
OP_REF, /* 81 Match a back reference */
|
||||||
OP_RECURSE, /* 81 Match a numbered subpattern (possibly recursive) */
|
OP_RECURSE, /* 82 Match a numbered subpattern (possibly recursive) */
|
||||||
OP_CALLOUT, /* 82 Call out to external function if provided */
|
OP_CALLOUT, /* 83 Call out to external function if provided */
|
||||||
|
|
||||||
OP_ALT, /* 83 Start of alternation */
|
OP_ALT, /* 84 Start of alternation */
|
||||||
OP_KET, /* 84 End of group that doesn't have an unbounded repeat */
|
OP_KET, /* 85 End of group that doesn't have an unbounded repeat */
|
||||||
OP_KETRMAX, /* 85 These two must remain together and in this */
|
OP_KETRMAX, /* 86 These two must remain together and in this */
|
||||||
OP_KETRMIN, /* 86 order. They are for groups the repeat for ever. */
|
OP_KETRMIN, /* 87 order. They are for groups the repeat for ever. */
|
||||||
|
|
||||||
/* The assertions must come before BRA, CBRA, ONCE, and COND.*/
|
/* The assertions must come before BRA, CBRA, ONCE, and COND.*/
|
||||||
|
|
||||||
OP_ASSERT, /* 87 Positive lookahead */
|
OP_ASSERT, /* 88 Positive lookahead */
|
||||||
OP_ASSERT_NOT, /* 88 Negative lookahead */
|
OP_ASSERT_NOT, /* 89 Negative lookahead */
|
||||||
OP_ASSERTBACK, /* 89 Positive lookbehind */
|
OP_ASSERTBACK, /* 90 Positive lookbehind */
|
||||||
OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */
|
OP_ASSERTBACK_NOT, /* 91 Negative lookbehind */
|
||||||
OP_REVERSE, /* 91 Move pointer back - used in lookbehind assertions */
|
OP_REVERSE, /* 92 Move pointer back - used in lookbehind assertions */
|
||||||
|
|
||||||
/* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
|
/* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
|
||||||
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
|
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||||
|
|
||||||
OP_ONCE, /* 92 Atomic group */
|
OP_ONCE, /* 93 Atomic group */
|
||||||
OP_BRA, /* 93 Start of non-capturing bracket */
|
OP_BRA, /* 94 Start of non-capturing bracket */
|
||||||
OP_CBRA, /* 94 Start of capturing bracket */
|
OP_CBRA, /* 95 Start of capturing bracket */
|
||||||
OP_COND, /* 95 Conditional group */
|
OP_COND, /* 96 Conditional group */
|
||||||
|
|
||||||
/* These three must follow the previous three, in the same order. There's a
|
/* These three must follow the previous three, in the same order. There's a
|
||||||
check for >= SBRA to distinguish the two sets. */
|
check for >= SBRA to distinguish the two sets. */
|
||||||
|
|
||||||
OP_SBRA, /* 96 Start of non-capturing bracket, check empty */
|
OP_SBRA, /* 97 Start of non-capturing bracket, check empty */
|
||||||
OP_SCBRA, /* 97 Start of capturing bracket, check empty */
|
OP_SCBRA, /* 98 Start of capturing bracket, check empty */
|
||||||
OP_SCOND, /* 98 Conditional group, check empty */
|
OP_SCOND, /* 99 Conditional group, check empty */
|
||||||
|
|
||||||
OP_CREF, /* 99 Used to hold a capture number as condition */
|
OP_CREF, /* 100 Used to hold a capture number as condition */
|
||||||
OP_RREF, /* 100 Used to hold a recursion number as condition */
|
OP_RREF, /* 101 Used to hold a recursion number as condition */
|
||||||
OP_DEF, /* 101 The DEFINE condition */
|
OP_DEF, /* 102 The DEFINE condition */
|
||||||
|
|
||||||
OP_BRAZERO, /* 102 These two must remain together and in this */
|
OP_BRAZERO, /* 103 These two must remain together and in this */
|
||||||
OP_BRAMINZERO, /* 103 order. */
|
OP_BRAMINZERO, /* 104 order. */
|
||||||
|
|
||||||
/* These are backtracking control verbs */
|
/* These are backtracking control verbs */
|
||||||
|
|
||||||
OP_PRUNE, /* 104 */
|
OP_PRUNE, /* 105 */
|
||||||
OP_SKIP, /* 105 */
|
OP_SKIP, /* 106 */
|
||||||
OP_THEN, /* 106 */
|
OP_THEN, /* 107 */
|
||||||
OP_COMMIT, /* 107 */
|
OP_COMMIT, /* 108 */
|
||||||
|
|
||||||
/* These are forced failure and success verbs */
|
/* These are forced failure and success verbs */
|
||||||
|
|
||||||
OP_FAIL, /* 108 */
|
OP_FAIL, /* 109 */
|
||||||
OP_ACCEPT /* 109 */
|
OP_ACCEPT, /* 110 */
|
||||||
|
|
||||||
|
/* This is used to skip a subpattern with a {0} quantifier */
|
||||||
|
|
||||||
|
OP_SKIPZERO /* 111 */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -779,7 +789,7 @@ for debugging. The macro is referenced only in pcre_printint.c. */
|
|||||||
|
|
||||||
#define OP_NAME_LIST \
|
#define OP_NAME_LIST \
|
||||||
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
|
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
|
||||||
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \
|
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
|
||||||
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
||||||
"extuni", "\\Z", "\\z", \
|
"extuni", "\\Z", "\\z", \
|
||||||
"Opt", "^", "$", "char", "charnc", "not", \
|
"Opt", "^", "$", "char", "charnc", "not", \
|
||||||
@ -795,7 +805,8 @@ for debugging. The macro is referenced only in pcre_printint.c. */
|
|||||||
"AssertB", "AssertB not", "Reverse", \
|
"AssertB", "AssertB not", "Reverse", \
|
||||||
"Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \
|
"Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \
|
||||||
"Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero", \
|
"Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero", \
|
||||||
"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT"
|
"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT", \
|
||||||
|
"Skip zero"
|
||||||
|
|
||||||
|
|
||||||
/* This macro defines the length of fixed length operations in the compiled
|
/* This macro defines the length of fixed length operations in the compiled
|
||||||
@ -811,7 +822,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||||||
1, /* End */ \
|
1, /* End */ \
|
||||||
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
|
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
|
||||||
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
|
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
|
||||||
1, 1, /* Any, Anybyte */ \
|
1, 1, 1, /* Any, AllAny, Anybyte */ \
|
||||||
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \
|
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \
|
||||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||||
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
||||||
@ -860,7 +871,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||||||
1, /* DEF */ \
|
1, /* DEF */ \
|
||||||
1, 1, /* BRAZERO, BRAMINZERO */ \
|
1, 1, /* BRAZERO, BRAMINZERO */ \
|
||||||
1, 1, 1, 1, /* PRUNE, SKIP, THEN, COMMIT, */ \
|
1, 1, 1, 1, /* PRUNE, SKIP, THEN, COMMIT, */ \
|
||||||
1, 1 /* FAIL, ACCEPT */
|
1, 1, 1 /* FAIL, ACCEPT, SKIPZERO */
|
||||||
|
|
||||||
|
|
||||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||||
@ -876,7 +887,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
|||||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||||
ERR60, ERR61, ERR62, ERR63 };
|
ERR60, ERR61, ERR62, ERR63, ERR64 };
|
||||||
|
|
||||||
/* The real format of the start of the pcre block; the index of names and the
|
/* The real format of the start of the pcre block; the index of names and the
|
||||||
code vector run on as long as necessary after the end. We store an explicit
|
code vector run on as long as necessary after the end. We store an explicit
|
||||||
@ -1001,6 +1012,7 @@ typedef struct match_data {
|
|||||||
BOOL notbol; /* NOTBOL flag */
|
BOOL notbol; /* NOTBOL flag */
|
||||||
BOOL noteol; /* NOTEOL flag */
|
BOOL noteol; /* NOTEOL flag */
|
||||||
BOOL utf8; /* UTF8 flag */
|
BOOL utf8; /* UTF8 flag */
|
||||||
|
BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */
|
||||||
BOOL endonly; /* Dollar not before final \n */
|
BOOL endonly; /* Dollar not before final \n */
|
||||||
BOOL notempty; /* Empty string match not wanted */
|
BOOL notempty; /* Empty string match not wanted */
|
||||||
BOOL partial; /* PARTIAL flag */
|
BOOL partial; /* PARTIAL flag */
|
||||||
|
@ -217,6 +217,13 @@ do
|
|||||||
tcode += 1 + LINK_SIZE;
|
tcode += 1 + LINK_SIZE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/* SKIPZERO skips the bracket. */
|
||||||
|
|
||||||
|
case OP_SKIPZERO:
|
||||||
|
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||||
|
tcode += 1 + LINK_SIZE;
|
||||||
|
break;
|
||||||
|
|
||||||
/* Single-char * or ? sets the bit and tries the next item */
|
/* Single-char * or ? sets the bit and tries the next item */
|
||||||
|
|
||||||
case OP_STAR:
|
case OP_STAR:
|
||||||
@ -341,6 +348,7 @@ do
|
|||||||
switch(tcode[1])
|
switch(tcode[1])
|
||||||
{
|
{
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
|
case OP_ALLANY:
|
||||||
return SSB_FAIL;
|
return SSB_FAIL;
|
||||||
|
|
||||||
case OP_NOT_DIGIT:
|
case OP_NOT_DIGIT:
|
||||||
|
@ -17,7 +17,7 @@ typedef struct cnode {
|
|||||||
|
|
||||||
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
||||||
#define f0_scriptshift 24 /* Shift for script value */
|
#define f0_scriptshift 24 /* Shift for script value */
|
||||||
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
|
#define f0_rangeflag 0x00800000 /* Flag for a range item */
|
||||||
#define f0_charmask 0x001fffff /* Mask for code point value */
|
#define f0_charmask 0x001fffff /* Mask for code point value */
|
||||||
|
|
||||||
/* Things for the f1 field */
|
/* Things for the f1 field */
|
||||||
|
Loading…
Reference in New Issue
Block a user