mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-27 06:26:15 +01:00
Update to 7.6, for real
svn path=/trunk/; revision=6669
This commit is contained in:
parent
b6ab5c133d
commit
a134d9f42b
@ -1,5 +1,3 @@
|
||||
include $(top_srcdir)/Makefile.decl
|
||||
|
||||
INCLUDES = \
|
||||
-DG_LOG_DOMAIN=\"GLib-GRegex\" \
|
||||
-DSUPPORT_UCP \
|
||||
@ -60,7 +58,7 @@ libpcre_la_LIBADD = $(DEP_LIBS)
|
||||
|
||||
libpcre_la_LDFLAGS = -no-undefined
|
||||
|
||||
EXTRA_DIST += \
|
||||
EXTRA_DIST = \
|
||||
COPYING \
|
||||
makefile.msc
|
||||
|
||||
|
@ -1,49 +1,35 @@
|
||||
TOP = ..\..\..
|
||||
!INCLUDE ..\..\build\win32\make.msc
|
||||
|
||||
INCLUDES = \
|
||||
-I ..\.. \
|
||||
-I ..
|
||||
|
||||
DEFINES = \
|
||||
-DPCRE_STATIC \
|
||||
-DHAVE_CONFIG_H \
|
||||
-DHAVE_LONG_LONG_FORMAT \
|
||||
-DSUPPORT_UCP \
|
||||
-DSUPPORT_UTF8 \
|
||||
-DNEWLINE=-1 \
|
||||
-DMATCH_LIMIT=10000000 \
|
||||
-DMATCH_LIMIT_RECURSION=10000000 \
|
||||
-DMAX_NAME_SIZE=32 \
|
||||
-DMAX_NAME_COUNT=10000 \
|
||||
-DMAX_DUPLENGTH=30000 \
|
||||
-DLINK_SIZE=2 \
|
||||
-UEBCDIC \
|
||||
-DPOSIX_MALLOC_THRESHOLD=10
|
||||
INCLUDES = \\
|
||||
-I ..\.. \\
|
||||
-I ..
|
||||
|
||||
DEFINES = \\
|
||||
-DPCRE_STATIC \\
|
||||
-DHAVE_CONFIG_H \\
|
||||
-DHAVE_LONG_LONG_FORMAT \\
|
||||
-DSUPPORT_UCP \\
|
||||
-DSUPPORT_UTF8 \\
|
||||
-DNEWLINE=-1 \\
|
||||
-DMATCH_LIMIT=10000000 \\
|
||||
-DMATCH_LIMIT_RECURSION=10000000 \\
|
||||
-DMAX_NAME_SIZE=32 \\
|
||||
-DMAX_NAME_COUNT=10000 \\
|
||||
-DMAX_DUPLENGTH=30000 \\
|
||||
-DLINK_SIZE=2 \\
|
||||
-DEBCDIC=0 \\
|
||||
-DPOSIX_MALLOC_THRESHOLD=10
|
||||
|
||||
OBJECTS = \
|
||||
pcre_chartables.obj \
|
||||
pcre_compile.obj \
|
||||
pcre_config.obj \
|
||||
pcre_dfa_exec.obj \
|
||||
pcre_exec.obj \
|
||||
pcre_fullinfo.obj \
|
||||
pcre_get.obj \
|
||||
pcre_globals.obj \
|
||||
pcre_info.obj \
|
||||
pcre_maketables.obj \
|
||||
pcre_newline.obj \
|
||||
pcre_ord2utf8.obj \
|
||||
pcre_refcount.obj \
|
||||
pcre_study.obj \
|
||||
pcre_tables.obj \
|
||||
pcre_try_flipped.obj \
|
||||
pcre_ucp_searchfuncs.obj \
|
||||
pcre_valid_utf8.obj \
|
||||
pcre_version.obj \
|
||||
pcre_xclass.obj \
|
||||
OBJECTS = \\
|
||||
`
|
||||
for f in $all_files; do
|
||||
echo " $f.obj \\\\"
|
||||
done
|
||||
`
|
||||
|
||||
all : pcre.lib
|
||||
|
||||
pcre.lib : $(OBJECTS)
|
||||
lib -out:pcre.lib $(OBJECTS)
|
||||
pcre.lib : \$(OBJECTS)
|
||||
lib -out:pcre.lib \$(OBJECTS)
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
/* This is the public header file for the PCRE library, to be #included by
|
||||
applications that call the PCRE functions.
|
||||
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -42,9 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 4
|
||||
|
||||
#define PCRE_MINOR 6
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2007-09-21
|
||||
#define PCRE_DATE 2008-01-28
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
@ -242,13 +243,6 @@ typedef struct pcre_callout_block {
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre_callout_block;
|
||||
|
||||
|
||||
/* Indirection for store get and free functions. These can be set to
|
||||
alternative malloc/free functions if required. Special ones are used in the
|
||||
non-recursive case for "frames". There is also an optional callout function
|
||||
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||
have to take another form. */
|
||||
|
||||
#include "glib.h"
|
||||
#include "galias.h"
|
||||
|
||||
|
@ -1,6 +1,3 @@
|
||||
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||
* the update of the local copy of PCRE.
|
||||
*/
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -241,7 +241,7 @@ static const char error_texts[] =
|
||||
/* 10 */
|
||||
"operand of unlimited repeat could match the empty string\0" /** DEAD **/
|
||||
"internal error: unexpected repeat\0"
|
||||
"unrecognized character after (?\0"
|
||||
"unrecognized character after (? or (?-\0"
|
||||
"POSIX named classes are supported only within a class\0"
|
||||
"missing )\0"
|
||||
/* 15 */
|
||||
@ -300,7 +300,9 @@ static const char error_texts[] =
|
||||
"(*VERB) with an argument is not supported\0"
|
||||
/* 60 */
|
||||
"(*VERB) not recognized\0"
|
||||
"number is too big";
|
||||
"number is too big\0"
|
||||
"subpattern name expected\0"
|
||||
"digit expected after (?+";
|
||||
|
||||
|
||||
/* Definition to allow mutual recursion */
|
||||
@ -372,19 +374,13 @@ ptr--; /* Set pointer back to the last byte */
|
||||
|
||||
if (c == 0) *errorcodeptr = ERR1;
|
||||
|
||||
/* Non-alphamerics are literals. For digits or letters, do an initial lookup in
|
||||
a table. A non-zero result is something that can be returned immediately.
|
||||
/* Non-alphanumerics are literals. For digits or letters, do an initial lookup
|
||||
in a table. A non-zero result is something that can be returned immediately.
|
||||
Otherwise further processing may be required. */
|
||||
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
else if (c < '0' || c > 'z') {} /* Not alphameric */
|
||||
else if (c < '0' || c > 'z') {} /* Not alphanumeric */
|
||||
else if ((i = escapes[c - '0']) != 0) c = i;
|
||||
|
||||
#else /* EBCDIC coding */
|
||||
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */
|
||||
else if ((i = escapes[c - 0x48]) != 0) c = i;
|
||||
#endif
|
||||
|
||||
/* Escapes that need further processing, or are illegal. */
|
||||
|
||||
else
|
||||
@ -598,10 +594,10 @@ else
|
||||
break;
|
||||
|
||||
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
|
||||
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
|
||||
for Perl compatibility, it is a literal. This code looks a bit odd, but
|
||||
there used to be some cases other than the default, and there may be again
|
||||
in future, so I haven't "optimized" it. */
|
||||
other alphanumeric following \ is an error if PCRE_EXTRA was set;
|
||||
otherwise, for Perl compatibility, it is a literal. This code looks a bit
|
||||
odd, but there used to be some cases other than the default, and there may
|
||||
be again in future, so I haven't "optimized" it. */
|
||||
|
||||
default:
|
||||
if ((options & PCRE_EXTRA) != 0) switch(c)
|
||||
@ -1382,8 +1378,9 @@ for (;;)
|
||||
can match the empty string or not. It is called from could_be_empty()
|
||||
below and from compile_branch() when checking for an unlimited repeat of a
|
||||
group that can match nothing. Note that first_significant_code() skips over
|
||||
assertions. If we hit an unclosed bracket, we return "empty" - this means we've
|
||||
struck an inner bracket whose current branch will already have been scanned.
|
||||
backward and negative forward assertions when its final argument is TRUE. If we
|
||||
hit an unclosed bracket, we return "empty" - this means we've struck an inner
|
||||
bracket whose current branch will already have been scanned.
|
||||
|
||||
Arguments:
|
||||
code points to start of search
|
||||
@ -1405,6 +1402,16 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
|
||||
c = *code;
|
||||
|
||||
/* Skip over forward assertions; the other assertions are skipped by
|
||||
first_significant_code() with a TRUE final argument. */
|
||||
|
||||
if (c == OP_ASSERT)
|
||||
{
|
||||
do code += GET(code, 1); while (*code == OP_ALT);
|
||||
c = *code;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Groups with zero repeats can of course be empty; skip them. */
|
||||
|
||||
if (c == OP_BRAZERO || c == OP_BRAMINZERO)
|
||||
@ -1600,29 +1607,48 @@ return TRUE;
|
||||
*************************************************/
|
||||
|
||||
/* This function is called when the sequence "[:" or "[." or "[=" is
|
||||
encountered in a character class. It checks whether this is followed by an
|
||||
optional ^ and then a sequence of letters, terminated by a matching ":]" or
|
||||
".]" or "=]".
|
||||
encountered in a character class. It checks whether this is followed by a
|
||||
sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
|
||||
reach an unescaped ']' without the special preceding character, return FALSE.
|
||||
|
||||
Argument:
|
||||
Originally, this function only recognized a sequence of letters between the
|
||||
terminators, but it seems that Perl recognizes any sequence of characters,
|
||||
though of course unknown POSIX names are subsequently rejected. Perl gives an
|
||||
"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
|
||||
didn't consider this to be a POSIX class. Likewise for [:1234:].
|
||||
|
||||
The problem in trying to be exactly like Perl is in the handling of escapes. We
|
||||
have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
|
||||
class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
|
||||
below handles the special case of \], but does not try to do any other escape
|
||||
processing. This makes it different from Perl for cases such as [:l\ower:]
|
||||
where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
|
||||
"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
|
||||
I think.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to the initial [
|
||||
endptr where to return the end pointer
|
||||
cd pointer to compile data
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
|
||||
check_posix_syntax(const uschar *ptr, const uschar **endptr)
|
||||
{
|
||||
int terminator; /* Don't combine these lines; the Solaris cc */
|
||||
terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */
|
||||
if (*(++ptr) == '^') ptr++;
|
||||
while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
|
||||
if (*ptr == terminator && ptr[1] == ']')
|
||||
for (++ptr; *ptr != 0; ptr++)
|
||||
{
|
||||
*endptr = ptr;
|
||||
return TRUE;
|
||||
if (*ptr == '\\' && ptr[1] == ']') ptr++; else
|
||||
{
|
||||
if (*ptr == ']') return FALSE;
|
||||
if (*ptr == terminator && ptr[1] == ']')
|
||||
{
|
||||
*endptr = ptr;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
@ -2220,6 +2246,7 @@ uschar classbits[32];
|
||||
BOOL class_utf8;
|
||||
BOOL utf8 = (options & PCRE_UTF8) != 0;
|
||||
uschar *class_utf8data;
|
||||
uschar *class_utf8data_base;
|
||||
uschar utf8_char[6];
|
||||
#else
|
||||
BOOL utf8 = FALSE;
|
||||
@ -2259,6 +2286,7 @@ req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
|
||||
for (;; ptr++)
|
||||
{
|
||||
BOOL negate_class;
|
||||
BOOL should_flip_negation;
|
||||
BOOL possessive_quantifier;
|
||||
BOOL is_quantifier;
|
||||
BOOL is_recurse;
|
||||
@ -2482,7 +2510,7 @@ for (;; ptr++)
|
||||
they are encountered at the top level, so we'll do that too. */
|
||||
|
||||
if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
|
||||
check_posix_syntax(ptr, &tempptr, cd))
|
||||
check_posix_syntax(ptr, &tempptr))
|
||||
{
|
||||
*errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
|
||||
goto FAILED;
|
||||
@ -2507,6 +2535,12 @@ for (;; ptr++)
|
||||
else break;
|
||||
}
|
||||
|
||||
/* If a class contains a negative special such as \S, we need to flip the
|
||||
negation flag at the end, so that support for characters > 255 works
|
||||
correctly (they are all included in the class). */
|
||||
|
||||
should_flip_negation = FALSE;
|
||||
|
||||
/* Keep a count of chars with values < 256 so that we can optimize the case
|
||||
of just a single character (as long as it's < 256). However, For higher
|
||||
valued UTF-8 characters, we don't yet do any optimization. */
|
||||
@ -2524,6 +2558,7 @@ for (;; ptr++)
|
||||
#ifdef SUPPORT_UTF8
|
||||
class_utf8 = FALSE; /* No chars >= 256 */
|
||||
class_utf8data = code + LINK_SIZE + 2; /* For UTF-8 items */
|
||||
class_utf8data_base = class_utf8data; /* For resetting in pass 1 */
|
||||
#endif
|
||||
|
||||
/* Process characters until ] is reached. By writing this as a "do" it
|
||||
@ -2539,6 +2574,18 @@ for (;; ptr++)
|
||||
{ /* Braces are required because the */
|
||||
GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */
|
||||
}
|
||||
|
||||
/* In the pre-compile phase, accumulate the length of any UTF-8 extra
|
||||
data and reset the pointer. This is so that very large classes that
|
||||
contain a zillion UTF-8 characters no longer overwrite the work space
|
||||
(which is on the stack). */
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
*lengthptr += class_utf8data - class_utf8data_base;
|
||||
class_utf8data = class_utf8data_base;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Inside \Q...\E everything is literal except \E */
|
||||
@ -2562,7 +2609,7 @@ for (;; ptr++)
|
||||
|
||||
if (c == '[' &&
|
||||
(ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
|
||||
check_posix_syntax(ptr, &tempptr, cd))
|
||||
check_posix_syntax(ptr, &tempptr))
|
||||
{
|
||||
BOOL local_negate = FALSE;
|
||||
int posix_class, taboffset, tabopt;
|
||||
@ -2579,6 +2626,7 @@ for (;; ptr++)
|
||||
if (*ptr == '^')
|
||||
{
|
||||
local_negate = TRUE;
|
||||
should_flip_negation = TRUE; /* Note negative special */
|
||||
ptr++;
|
||||
}
|
||||
|
||||
@ -2653,7 +2701,7 @@ for (;; ptr++)
|
||||
c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
|
||||
if (*errorcodeptr != 0) goto FAILED;
|
||||
|
||||
if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */
|
||||
if (-c == ESC_b) c = '\b'; /* \b is backspace in a class */
|
||||
else if (-c == ESC_X) c = 'X'; /* \X is literal X in a class */
|
||||
else if (-c == ESC_R) c = 'R'; /* \R is literal R in a class */
|
||||
else if (-c == ESC_Q) /* Handle start of quoted string */
|
||||
@ -2681,6 +2729,7 @@ for (;; ptr++)
|
||||
continue;
|
||||
|
||||
case ESC_D:
|
||||
should_flip_negation = TRUE;
|
||||
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
|
||||
continue;
|
||||
|
||||
@ -2689,6 +2738,7 @@ for (;; ptr++)
|
||||
continue;
|
||||
|
||||
case ESC_W:
|
||||
should_flip_negation = TRUE;
|
||||
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
|
||||
continue;
|
||||
|
||||
@ -2698,13 +2748,11 @@ for (;; ptr++)
|
||||
continue;
|
||||
|
||||
case ESC_S:
|
||||
should_flip_negation = TRUE;
|
||||
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
|
||||
classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */
|
||||
continue;
|
||||
|
||||
case ESC_E: /* Perl ignores an orphan \E */
|
||||
continue;
|
||||
|
||||
default: /* Not recognized; fall through */
|
||||
break; /* Need "default" setting to stop compiler warning. */
|
||||
}
|
||||
@ -2939,7 +2987,7 @@ for (;; ptr++)
|
||||
d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
|
||||
if (*errorcodeptr != 0) goto FAILED;
|
||||
|
||||
/* \b is backslash; \X is literal X; \R is literal R; any other
|
||||
/* \b is backspace; \X is literal X; \R is literal R; any other
|
||||
special means the '-' was literal */
|
||||
|
||||
if (d < 0)
|
||||
@ -3203,11 +3251,14 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
zeroreqbyte = reqbyte;
|
||||
|
||||
/* If there are characters with values > 255, we have to compile an
|
||||
extended class, with its own opcode. If there are no characters < 256,
|
||||
we can omit the bitmap in the actual compiled code. */
|
||||
extended class, with its own opcode, unless there was a negated special
|
||||
such as \S in the class, because in that case all characters > 255 are in
|
||||
the class, so any that were explicitly given as well can be ignored. If
|
||||
(when there are explicit characters > 255 that must be listed) there are no
|
||||
characters < 256, we can omit the bitmap in the actual compiled code. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (class_utf8)
|
||||
if (class_utf8 && !should_flip_negation)
|
||||
{
|
||||
*class_utf8data++ = XCL_END; /* Marks the end of extra data */
|
||||
*code++ = OP_XCLASS;
|
||||
@ -3233,20 +3284,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If there are no characters > 255, negate the 32-byte map if necessary,
|
||||
and copy it into the code vector. If this is the first thing in the branch,
|
||||
there can be no first char setting, whatever the repeat count. Any reqbyte
|
||||
setting must remain unchanged after any kind of repeat. */
|
||||
/* If there are no characters > 255, set the opcode to OP_CLASS or
|
||||
OP_NCLASS, depending on whether the whole class was negated and whether
|
||||
there were negative specials such as \S in the class. Then copy the 32-byte
|
||||
map into the code vector, negating it if necessary. */
|
||||
|
||||
*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
|
||||
if (negate_class)
|
||||
{
|
||||
*code++ = OP_NCLASS;
|
||||
if (lengthptr == NULL) /* Save time in the pre-compile phase */
|
||||
for (c = 0; c < 32; c++) code[c] = ~classbits[c];
|
||||
}
|
||||
else
|
||||
{
|
||||
*code++ = OP_CLASS;
|
||||
memcpy(code, classbits, 32);
|
||||
}
|
||||
code += 32;
|
||||
@ -3882,7 +3932,9 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
int len;
|
||||
if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
|
||||
*tempcode == OP_NOTEXACT)
|
||||
tempcode += _pcre_OP_lengths[*tempcode];
|
||||
tempcode += _pcre_OP_lengths[*tempcode] +
|
||||
((*tempcode == OP_TYPEEXACT &&
|
||||
(tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
|
||||
len = code - tempcode;
|
||||
if (len > 0) switch (*tempcode)
|
||||
{
|
||||
@ -4109,16 +4161,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
*errorcodeptr = ERR58;
|
||||
goto FAILED;
|
||||
}
|
||||
if (refsign == '-')
|
||||
recno = (refsign == '-')?
|
||||
cd->bracount - recno + 1 : recno +cd->bracount;
|
||||
if (recno <= 0 || recno > cd->final_bracount)
|
||||
{
|
||||
recno = cd->bracount - recno + 1;
|
||||
if (recno <= 0)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
goto FAILED;
|
||||
}
|
||||
*errorcodeptr = ERR15;
|
||||
goto FAILED;
|
||||
}
|
||||
else recno += cd->bracount;
|
||||
PUT2(code, 2+LINK_SIZE, recno);
|
||||
break;
|
||||
}
|
||||
@ -4190,9 +4239,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
skipbytes = 1;
|
||||
}
|
||||
|
||||
/* Check for the "name" actually being a subpattern number. */
|
||||
/* Check for the "name" actually being a subpattern number. We are
|
||||
in the second pass here, so final_bracount is set. */
|
||||
|
||||
else if (recno > 0)
|
||||
else if (recno > 0 && recno <= cd->final_bracount)
|
||||
{
|
||||
PUT2(code, 2+LINK_SIZE, recno);
|
||||
}
|
||||
@ -4386,7 +4436,9 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
|
||||
/* We come here from the Python syntax above that handles both
|
||||
references (?P=name) and recursion (?P>name), as well as falling
|
||||
through from the Perl recursion syntax (?&name). */
|
||||
through from the Perl recursion syntax (?&name). We also come here from
|
||||
the Perl \k<name> or \k'name' back reference syntax and the \k{name}
|
||||
.NET syntax. */
|
||||
|
||||
NAMED_REF_OR_RECURSE:
|
||||
name = ++ptr;
|
||||
@ -4398,6 +4450,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
if (namelen == 0)
|
||||
{
|
||||
*errorcodeptr = ERR62;
|
||||
goto FAILED;
|
||||
}
|
||||
if (*ptr != terminator)
|
||||
{
|
||||
*errorcodeptr = ERR42;
|
||||
@ -4411,14 +4468,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
recno = 0;
|
||||
}
|
||||
|
||||
/* In the real compile, seek the name in the table */
|
||||
/* In the real compile, seek the name in the table. We check the name
|
||||
first, and then check that we have reached the end of the name in the
|
||||
table. That way, if the name that is longer than any in the table,
|
||||
the comparison will fail without reading beyond the table entry. */
|
||||
|
||||
else
|
||||
{
|
||||
slot = cd->name_table;
|
||||
for (i = 0; i < cd->names_found; i++)
|
||||
{
|
||||
if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
|
||||
if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
|
||||
slot[2+namelen] == 0)
|
||||
break;
|
||||
slot += cd->name_entry_size;
|
||||
}
|
||||
|
||||
@ -4455,7 +4517,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
{
|
||||
const uschar *called;
|
||||
|
||||
if ((refsign = *ptr) == '+') ptr++;
|
||||
if ((refsign = *ptr) == '+')
|
||||
{
|
||||
ptr++;
|
||||
if (g_ascii_isdigit(*ptr) == 0)
|
||||
{
|
||||
*errorcodeptr = ERR63;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
else if (refsign == '-')
|
||||
{
|
||||
if (g_ascii_isdigit(ptr[1]) == 0)
|
||||
@ -5621,7 +5691,6 @@ to fill in forward references to subpatterns. */
|
||||
|
||||
uschar cworkspace[COMPILE_WORK_SIZE];
|
||||
|
||||
|
||||
/* Set this early so that early errors get offset 0. */
|
||||
|
||||
ptr = (const uschar *)pattern;
|
||||
@ -5782,7 +5851,7 @@ to compile parts of the pattern into; the compiled code is discarded when it is
|
||||
no longer needed, so hopefully this workspace will never overflow, though there
|
||||
is a test for its doing so. */
|
||||
|
||||
cd->bracount = 0;
|
||||
cd->bracount = cd->final_bracount = 0;
|
||||
cd->names_found = 0;
|
||||
cd->name_entry_size = 0;
|
||||
cd->name_table = NULL;
|
||||
@ -5859,6 +5928,7 @@ field. Reset the bracket count and the names_found field. Also reset the hwm
|
||||
field; this time it's used for remembering forward references to subpatterns.
|
||||
*/
|
||||
|
||||
cd->final_bracount = cd->bracount; /* Save for checking forward references */
|
||||
cd->bracount = 0;
|
||||
cd->names_found = 0;
|
||||
cd->name_table = (uschar *)re + re->name_table_offset;
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -4670,10 +4670,10 @@ for(;;)
|
||||
if (first_byte_caseless)
|
||||
while (start_match < end_subject &&
|
||||
md->lcc[*start_match] != first_byte)
|
||||
start_match++;
|
||||
{ NEXTCHAR(start_match); }
|
||||
else
|
||||
while (start_match < end_subject && *start_match != first_byte)
|
||||
start_match++;
|
||||
{ NEXTCHAR(start_match); }
|
||||
}
|
||||
|
||||
/* Or to just after a linebreak for a multiline match if possible */
|
||||
@ -4683,7 +4683,7 @@ for(;;)
|
||||
if (start_match > md->start_subject + start_offset)
|
||||
{
|
||||
while (start_match <= end_subject && !WAS_NEWLINE(start_match))
|
||||
start_match++;
|
||||
{ NEXTCHAR(start_match); }
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or ANYCRLF,
|
||||
and we are now at a LF, advance the match position by one more character.
|
||||
@ -4704,7 +4704,9 @@ for(;;)
|
||||
while (start_match < end_subject)
|
||||
{
|
||||
register unsigned int c = *start_match;
|
||||
if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
|
||||
if ((start_bits[c/8] & (1 << (c&7))) == 0)
|
||||
{ NEXTCHAR(start_match); }
|
||||
else break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,11 +2,11 @@
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/*PCRE is a library of functions to support regular expressions whose syntax
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -7,7 +7,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -363,6 +363,7 @@ never be called in byte mode. To make sure it can never even appear when UTF-8
|
||||
support is omitted, we don't even define it. */
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
#define NEXTCHAR(p) p++;
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
@ -372,6 +373,13 @@ support is omitted, we don't even define it. */
|
||||
|
||||
#else /* SUPPORT_UTF8 */
|
||||
|
||||
/* Advance a character pointer one byte in non-UTF-8 mode and by one character
|
||||
in UTF-8 mode. */
|
||||
|
||||
#define NEXTCHAR(p) \
|
||||
p++; \
|
||||
if (utf8) { while((*p & 0xc0) == 0x80) p++; }
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-8 mode. */
|
||||
|
||||
@ -535,7 +543,7 @@ req_byte match. */
|
||||
#define REQ_BYTE_MAX 1000
|
||||
|
||||
/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
|
||||
variable-length repeat, or anything other than literal characters. */
|
||||
variable-length repeat, or a anything other than literal characters. */
|
||||
|
||||
#define REQ_CASELESS 0x0100 /* indicates caselessness */
|
||||
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
|
||||
@ -868,7 +876,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||
ERR60, ERR61 };
|
||||
ERR60, ERR61, ERR62, ERR63 };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
@ -931,7 +939,8 @@ typedef struct compile_data {
|
||||
uschar *name_table; /* The name/number table */
|
||||
int names_found; /* Number of entries so far */
|
||||
int name_entry_size; /* Size of each entry */
|
||||
int bracount; /* Count of capturing parens */
|
||||
int bracount; /* Count of capturing parens as we compile */
|
||||
int final_bracount; /* Saved value after first pass */
|
||||
int top_backref; /* Maximum back reference */
|
||||
unsigned int backref_map; /* Bitmap of low back refs */
|
||||
int external_options; /* External (initial) options */
|
||||
@ -1033,7 +1042,7 @@ typedef struct dfa_match_data {
|
||||
#define ctype_letter 0x02
|
||||
#define ctype_digit 0x04
|
||||
#define ctype_xdigit 0x08
|
||||
#define ctype_word 0x10 /* alphameric or '_' */
|
||||
#define ctype_word 0x10 /* alphanumeric or '_' */
|
||||
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
|
||||
|
||||
/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
Loading…
Reference in New Issue
Block a user