Update to 7.6, for real

svn path=/trunk/; revision=6669
This commit is contained in:
Matthias Clasen 2008-03-11 01:51:07 +00:00
parent b6ab5c133d
commit a134d9f42b
22 changed files with 202 additions and 146 deletions

View File

@ -1,5 +1,3 @@
include $(top_srcdir)/Makefile.decl
INCLUDES = \ INCLUDES = \
-DG_LOG_DOMAIN=\"GLib-GRegex\" \ -DG_LOG_DOMAIN=\"GLib-GRegex\" \
-DSUPPORT_UCP \ -DSUPPORT_UCP \
@ -60,7 +58,7 @@ libpcre_la_LIBADD = $(DEP_LIBS)
libpcre_la_LDFLAGS = -no-undefined libpcre_la_LDFLAGS = -no-undefined
EXTRA_DIST += \ EXTRA_DIST = \
COPYING \ COPYING \
makefile.msc makefile.msc

View File

@ -1,49 +1,35 @@
TOP = ..\..\.. TOP = ..\..\..
!INCLUDE ..\..\build\win32\make.msc !INCLUDE ..\..\build\win32\make.msc
INCLUDES = \ INCLUDES = \\
-I ..\.. \ -I ..\.. \\
-I .. -I ..
DEFINES = \ DEFINES = \\
-DPCRE_STATIC \ -DPCRE_STATIC \\
-DHAVE_CONFIG_H \ -DHAVE_CONFIG_H \\
-DHAVE_LONG_LONG_FORMAT \ -DHAVE_LONG_LONG_FORMAT \\
-DSUPPORT_UCP \ -DSUPPORT_UCP \\
-DSUPPORT_UTF8 \ -DSUPPORT_UTF8 \\
-DNEWLINE=-1 \ -DNEWLINE=-1 \\
-DMATCH_LIMIT=10000000 \ -DMATCH_LIMIT=10000000 \\
-DMATCH_LIMIT_RECURSION=10000000 \ -DMATCH_LIMIT_RECURSION=10000000 \\
-DMAX_NAME_SIZE=32 \ -DMAX_NAME_SIZE=32 \\
-DMAX_NAME_COUNT=10000 \ -DMAX_NAME_COUNT=10000 \\
-DMAX_DUPLENGTH=30000 \ -DMAX_DUPLENGTH=30000 \\
-DLINK_SIZE=2 \ -DLINK_SIZE=2 \\
-UEBCDIC \ -DEBCDIC=0 \\
-DPOSIX_MALLOC_THRESHOLD=10 -DPOSIX_MALLOC_THRESHOLD=10
OBJECTS = \ OBJECTS = \\
pcre_chartables.obj \ `
pcre_compile.obj \ for f in $all_files; do
pcre_config.obj \ echo " $f.obj \\\\"
pcre_dfa_exec.obj \ done
pcre_exec.obj \ `
pcre_fullinfo.obj \
pcre_get.obj \
pcre_globals.obj \
pcre_info.obj \
pcre_maketables.obj \
pcre_newline.obj \
pcre_ord2utf8.obj \
pcre_refcount.obj \
pcre_study.obj \
pcre_tables.obj \
pcre_try_flipped.obj \
pcre_ucp_searchfuncs.obj \
pcre_valid_utf8.obj \
pcre_version.obj \
pcre_xclass.obj \
all : pcre.lib all : pcre.lib
pcre.lib : $(OBJECTS) pcre.lib : \$(OBJECTS)
lib -out:pcre.lib $(OBJECTS) lib -out:pcre.lib \$(OBJECTS)

View File

@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, to be #included by /* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions. applications that call the PCRE functions.
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -42,9 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */ /* The current PCRE version information. */
#define PCRE_MAJOR 7 #define PCRE_MAJOR 7
#define PCRE_MINOR 4
#define PCRE_MINOR 6
#define PCRE_PRERELEASE #define PCRE_PRERELEASE
#define PCRE_DATE 2007-09-21 #define PCRE_DATE 2008-01-28
/* When an application links to a PCRE DLL in Windows, the symbols that are /* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate imported have to be identified as such. When building PCRE, the appropriate
@ -242,13 +243,6 @@ typedef struct pcre_callout_block {
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
} pcre_callout_block; } pcre_callout_block;
/* Indirection for store get and free functions. These can be set to
alternative malloc/free functions if required. Special ones are used in the
non-recursive case for "frames". There is also an optional callout function
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
have to take another form. */
#include "glib.h" #include "glib.h"
#include "galias.h" #include "galias.h"

View File

@ -1,6 +1,3 @@
/* This file is autogenerated by ../update-pcre/update.sh during
* the update of the local copy of PCRE.
*/
/************************************************* /*************************************************
* Perl-Compatible Regular Expressions * * Perl-Compatible Regular Expressions *
*************************************************/ *************************************************/

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -241,7 +241,7 @@ static const char error_texts[] =
/* 10 */ /* 10 */
"operand of unlimited repeat could match the empty string\0" /** DEAD **/ "operand of unlimited repeat could match the empty string\0" /** DEAD **/
"internal error: unexpected repeat\0" "internal error: unexpected repeat\0"
"unrecognized character after (?\0" "unrecognized character after (? or (?-\0"
"POSIX named classes are supported only within a class\0" "POSIX named classes are supported only within a class\0"
"missing )\0" "missing )\0"
/* 15 */ /* 15 */
@ -300,7 +300,9 @@ static const char error_texts[] =
"(*VERB) with an argument is not supported\0" "(*VERB) with an argument is not supported\0"
/* 60 */ /* 60 */
"(*VERB) not recognized\0" "(*VERB) not recognized\0"
"number is too big"; "number is too big\0"
"subpattern name expected\0"
"digit expected after (?+";
/* Definition to allow mutual recursion */ /* Definition to allow mutual recursion */
@ -372,19 +374,13 @@ ptr--; /* Set pointer back to the last byte */
if (c == 0) *errorcodeptr = ERR1; if (c == 0) *errorcodeptr = ERR1;
/* Non-alphamerics are literals. For digits or letters, do an initial lookup in /* Non-alphanumerics are literals. For digits or letters, do an initial lookup
a table. A non-zero result is something that can be returned immediately. in a table. A non-zero result is something that can be returned immediately.
Otherwise further processing may be required. */ Otherwise further processing may be required. */
#ifndef EBCDIC /* ASCII coding */ else if (c < '0' || c > 'z') {} /* Not alphanumeric */
else if (c < '0' || c > 'z') {} /* Not alphameric */
else if ((i = escapes[c - '0']) != 0) c = i; else if ((i = escapes[c - '0']) != 0) c = i;
#else /* EBCDIC coding */
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */
else if ((i = escapes[c - 0x48]) != 0) c = i;
#endif
/* Escapes that need further processing, or are illegal. */ /* Escapes that need further processing, or are illegal. */
else else
@ -598,10 +594,10 @@ else
break; break;
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, other alphanumeric following \ is an error if PCRE_EXTRA was set;
for Perl compatibility, it is a literal. This code looks a bit odd, but otherwise, for Perl compatibility, it is a literal. This code looks a bit
there used to be some cases other than the default, and there may be again odd, but there used to be some cases other than the default, and there may
in future, so I haven't "optimized" it. */ be again in future, so I haven't "optimized" it. */
default: default:
if ((options & PCRE_EXTRA) != 0) switch(c) if ((options & PCRE_EXTRA) != 0) switch(c)
@ -1382,8 +1378,9 @@ for (;;)
can match the empty string or not. It is called from could_be_empty() can match the empty string or not. It is called from could_be_empty()
below and from compile_branch() when checking for an unlimited repeat of a below and from compile_branch() when checking for an unlimited repeat of a
group that can match nothing. Note that first_significant_code() skips over group that can match nothing. Note that first_significant_code() skips over
assertions. If we hit an unclosed bracket, we return "empty" - this means we've backward and negative forward assertions when its final argument is TRUE. If we
struck an inner bracket whose current branch will already have been scanned. hit an unclosed bracket, we return "empty" - this means we've struck an inner
bracket whose current branch will already have been scanned.
Arguments: Arguments:
code points to start of search code points to start of search
@ -1405,6 +1402,16 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
c = *code; c = *code;
/* Skip over forward assertions; the other assertions are skipped by
first_significant_code() with a TRUE final argument. */
if (c == OP_ASSERT)
{
do code += GET(code, 1); while (*code == OP_ALT);
c = *code;
continue;
}
/* Groups with zero repeats can of course be empty; skip them. */ /* Groups with zero repeats can of course be empty; skip them. */
if (c == OP_BRAZERO || c == OP_BRAMINZERO) if (c == OP_BRAZERO || c == OP_BRAMINZERO)
@ -1600,30 +1607,49 @@ return TRUE;
*************************************************/ *************************************************/
/* This function is called when the sequence "[:" or "[." or "[=" is /* This function is called when the sequence "[:" or "[." or "[=" is
encountered in a character class. It checks whether this is followed by an encountered in a character class. It checks whether this is followed by a
optional ^ and then a sequence of letters, terminated by a matching ":]" or sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
".]" or "=]". reach an unescaped ']' without the special preceding character, return FALSE.
Argument: Originally, this function only recognized a sequence of letters between the
terminators, but it seems that Perl recognizes any sequence of characters,
though of course unknown POSIX names are subsequently rejected. Perl gives an
"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
didn't consider this to be a POSIX class. Likewise for [:1234:].
The problem in trying to be exactly like Perl is in the handling of escapes. We
have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
below handles the special case of \], but does not try to do any other escape
processing. This makes it different from Perl for cases such as [:l\ower:]
where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
I think.
Arguments:
ptr pointer to the initial [ ptr pointer to the initial [
endptr where to return the end pointer endptr where to return the end pointer
cd pointer to compile data
Returns: TRUE or FALSE Returns: TRUE or FALSE
*/ */
static BOOL static BOOL
check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd) check_posix_syntax(const uschar *ptr, const uschar **endptr)
{ {
int terminator; /* Don't combine these lines; the Solaris cc */ int terminator; /* Don't combine these lines; the Solaris cc */
terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */
if (*(++ptr) == '^') ptr++; for (++ptr; *ptr != 0; ptr++)
while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; {
if (*ptr == terminator && ptr[1] == ']') if (*ptr == '\\' && ptr[1] == ']') ptr++; else
{
if (*ptr == ']') return FALSE;
if (*ptr == terminator && ptr[1] == ']')
{ {
*endptr = ptr; *endptr = ptr;
return TRUE; return TRUE;
} }
}
}
return FALSE; return FALSE;
} }
@ -2220,6 +2246,7 @@ uschar classbits[32];
BOOL class_utf8; BOOL class_utf8;
BOOL utf8 = (options & PCRE_UTF8) != 0; BOOL utf8 = (options & PCRE_UTF8) != 0;
uschar *class_utf8data; uschar *class_utf8data;
uschar *class_utf8data_base;
uschar utf8_char[6]; uschar utf8_char[6];
#else #else
BOOL utf8 = FALSE; BOOL utf8 = FALSE;
@ -2259,6 +2286,7 @@ req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
for (;; ptr++) for (;; ptr++)
{ {
BOOL negate_class; BOOL negate_class;
BOOL should_flip_negation;
BOOL possessive_quantifier; BOOL possessive_quantifier;
BOOL is_quantifier; BOOL is_quantifier;
BOOL is_recurse; BOOL is_recurse;
@ -2482,7 +2510,7 @@ for (;; ptr++)
they are encountered at the top level, so we'll do that too. */ they are encountered at the top level, so we'll do that too. */
if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
check_posix_syntax(ptr, &tempptr, cd)) check_posix_syntax(ptr, &tempptr))
{ {
*errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31; *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
goto FAILED; goto FAILED;
@ -2507,6 +2535,12 @@ for (;; ptr++)
else break; else break;
} }
/* If a class contains a negative special such as \S, we need to flip the
negation flag at the end, so that support for characters > 255 works
correctly (they are all included in the class). */
should_flip_negation = FALSE;
/* Keep a count of chars with values < 256 so that we can optimize the case /* Keep a count of chars with values < 256 so that we can optimize the case
of just a single character (as long as it's < 256). However, For higher of just a single character (as long as it's < 256). However, For higher
valued UTF-8 characters, we don't yet do any optimization. */ valued UTF-8 characters, we don't yet do any optimization. */
@ -2524,6 +2558,7 @@ for (;; ptr++)
#ifdef SUPPORT_UTF8 #ifdef SUPPORT_UTF8
class_utf8 = FALSE; /* No chars >= 256 */ class_utf8 = FALSE; /* No chars >= 256 */
class_utf8data = code + LINK_SIZE + 2; /* For UTF-8 items */ class_utf8data = code + LINK_SIZE + 2; /* For UTF-8 items */
class_utf8data_base = class_utf8data; /* For resetting in pass 1 */
#endif #endif
/* Process characters until ] is reached. By writing this as a "do" it /* Process characters until ] is reached. By writing this as a "do" it
@ -2539,6 +2574,18 @@ for (;; ptr++)
{ /* Braces are required because the */ { /* Braces are required because the */
GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */
} }
/* In the pre-compile phase, accumulate the length of any UTF-8 extra
data and reset the pointer. This is so that very large classes that
contain a zillion UTF-8 characters no longer overwrite the work space
(which is on the stack). */
if (lengthptr != NULL)
{
*lengthptr += class_utf8data - class_utf8data_base;
class_utf8data = class_utf8data_base;
}
#endif #endif
/* Inside \Q...\E everything is literal except \E */ /* Inside \Q...\E everything is literal except \E */
@ -2562,7 +2609,7 @@ for (;; ptr++)
if (c == '[' && if (c == '[' &&
(ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
check_posix_syntax(ptr, &tempptr, cd)) check_posix_syntax(ptr, &tempptr))
{ {
BOOL local_negate = FALSE; BOOL local_negate = FALSE;
int posix_class, taboffset, tabopt; int posix_class, taboffset, tabopt;
@ -2579,6 +2626,7 @@ for (;; ptr++)
if (*ptr == '^') if (*ptr == '^')
{ {
local_negate = TRUE; local_negate = TRUE;
should_flip_negation = TRUE; /* Note negative special */
ptr++; ptr++;
} }
@ -2653,7 +2701,7 @@ for (;; ptr++)
c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE); c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
if (*errorcodeptr != 0) goto FAILED; if (*errorcodeptr != 0) goto FAILED;
if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */ if (-c == ESC_b) c = '\b'; /* \b is backspace in a class */
else if (-c == ESC_X) c = 'X'; /* \X is literal X in a class */ else if (-c == ESC_X) c = 'X'; /* \X is literal X in a class */
else if (-c == ESC_R) c = 'R'; /* \R is literal R in a class */ else if (-c == ESC_R) c = 'R'; /* \R is literal R in a class */
else if (-c == ESC_Q) /* Handle start of quoted string */ else if (-c == ESC_Q) /* Handle start of quoted string */
@ -2681,6 +2729,7 @@ for (;; ptr++)
continue; continue;
case ESC_D: case ESC_D:
should_flip_negation = TRUE;
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
continue; continue;
@ -2689,6 +2738,7 @@ for (;; ptr++)
continue; continue;
case ESC_W: case ESC_W:
should_flip_negation = TRUE;
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
continue; continue;
@ -2698,13 +2748,11 @@ for (;; ptr++)
continue; continue;
case ESC_S: case ESC_S:
should_flip_negation = TRUE;
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */ classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */
continue; continue;
case ESC_E: /* Perl ignores an orphan \E */
continue;
default: /* Not recognized; fall through */ default: /* Not recognized; fall through */
break; /* Need "default" setting to stop compiler warning. */ break; /* Need "default" setting to stop compiler warning. */
} }
@ -2939,7 +2987,7 @@ for (;; ptr++)
d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE); d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
if (*errorcodeptr != 0) goto FAILED; if (*errorcodeptr != 0) goto FAILED;
/* \b is backslash; \X is literal X; \R is literal R; any other /* \b is backspace; \X is literal X; \R is literal R; any other
special means the '-' was literal */ special means the '-' was literal */
if (d < 0) if (d < 0)
@ -3203,11 +3251,14 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
zeroreqbyte = reqbyte; zeroreqbyte = reqbyte;
/* If there are characters with values > 255, we have to compile an /* If there are characters with values > 255, we have to compile an
extended class, with its own opcode. If there are no characters < 256, extended class, with its own opcode, unless there was a negated special
we can omit the bitmap in the actual compiled code. */ such as \S in the class, because in that case all characters > 255 are in
the class, so any that were explicitly given as well can be ignored. If
(when there are explicit characters > 255 that must be listed) there are no
characters < 256, we can omit the bitmap in the actual compiled code. */
#ifdef SUPPORT_UTF8 #ifdef SUPPORT_UTF8
if (class_utf8) if (class_utf8 && !should_flip_negation)
{ {
*class_utf8data++ = XCL_END; /* Marks the end of extra data */ *class_utf8data++ = XCL_END; /* Marks the end of extra data */
*code++ = OP_XCLASS; *code++ = OP_XCLASS;
@ -3233,20 +3284,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
} }
#endif #endif
/* If there are no characters > 255, negate the 32-byte map if necessary, /* If there are no characters > 255, set the opcode to OP_CLASS or
and copy it into the code vector. If this is the first thing in the branch, OP_NCLASS, depending on whether the whole class was negated and whether
there can be no first char setting, whatever the repeat count. Any reqbyte there were negative specials such as \S in the class. Then copy the 32-byte
setting must remain unchanged after any kind of repeat. */ map into the code vector, negating it if necessary. */
*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
if (negate_class) if (negate_class)
{ {
*code++ = OP_NCLASS;
if (lengthptr == NULL) /* Save time in the pre-compile phase */ if (lengthptr == NULL) /* Save time in the pre-compile phase */
for (c = 0; c < 32; c++) code[c] = ~classbits[c]; for (c = 0; c < 32; c++) code[c] = ~classbits[c];
} }
else else
{ {
*code++ = OP_CLASS;
memcpy(code, classbits, 32); memcpy(code, classbits, 32);
} }
code += 32; code += 32;
@ -3882,7 +3932,9 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
int len; int len;
if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT || if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
*tempcode == OP_NOTEXACT) *tempcode == OP_NOTEXACT)
tempcode += _pcre_OP_lengths[*tempcode]; tempcode += _pcre_OP_lengths[*tempcode] +
((*tempcode == OP_TYPEEXACT &&
(tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
len = code - tempcode; len = code - tempcode;
if (len > 0) switch (*tempcode) if (len > 0) switch (*tempcode)
{ {
@ -4109,16 +4161,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
*errorcodeptr = ERR58; *errorcodeptr = ERR58;
goto FAILED; goto FAILED;
} }
if (refsign == '-') recno = (refsign == '-')?
{ cd->bracount - recno + 1 : recno +cd->bracount;
recno = cd->bracount - recno + 1; if (recno <= 0 || recno > cd->final_bracount)
if (recno <= 0)
{ {
*errorcodeptr = ERR15; *errorcodeptr = ERR15;
goto FAILED; goto FAILED;
} }
}
else recno += cd->bracount;
PUT2(code, 2+LINK_SIZE, recno); PUT2(code, 2+LINK_SIZE, recno);
break; break;
} }
@ -4190,9 +4239,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
skipbytes = 1; skipbytes = 1;
} }
/* Check for the "name" actually being a subpattern number. */ /* Check for the "name" actually being a subpattern number. We are
in the second pass here, so final_bracount is set. */
else if (recno > 0) else if (recno > 0 && recno <= cd->final_bracount)
{ {
PUT2(code, 2+LINK_SIZE, recno); PUT2(code, 2+LINK_SIZE, recno);
} }
@ -4386,7 +4436,9 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* We come here from the Python syntax above that handles both /* We come here from the Python syntax above that handles both
references (?P=name) and recursion (?P>name), as well as falling references (?P=name) and recursion (?P>name), as well as falling
through from the Perl recursion syntax (?&name). */ through from the Perl recursion syntax (?&name). We also come here from
the Perl \k<name> or \k'name' back reference syntax and the \k{name}
.NET syntax. */
NAMED_REF_OR_RECURSE: NAMED_REF_OR_RECURSE:
name = ++ptr; name = ++ptr;
@ -4398,6 +4450,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
if (lengthptr != NULL) if (lengthptr != NULL)
{ {
if (namelen == 0)
{
*errorcodeptr = ERR62;
goto FAILED;
}
if (*ptr != terminator) if (*ptr != terminator)
{ {
*errorcodeptr = ERR42; *errorcodeptr = ERR42;
@ -4411,14 +4468,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
recno = 0; recno = 0;
} }
/* In the real compile, seek the name in the table */ /* In the real compile, seek the name in the table. We check the name
first, and then check that we have reached the end of the name in the
table. That way, if the name that is longer than any in the table,
the comparison will fail without reading beyond the table entry. */
else else
{ {
slot = cd->name_table; slot = cd->name_table;
for (i = 0; i < cd->names_found; i++) for (i = 0; i < cd->names_found; i++)
{ {
if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break; if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
slot[2+namelen] == 0)
break;
slot += cd->name_entry_size; slot += cd->name_entry_size;
} }
@ -4455,7 +4517,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{ {
const uschar *called; const uschar *called;
if ((refsign = *ptr) == '+') ptr++; if ((refsign = *ptr) == '+')
{
ptr++;
if (g_ascii_isdigit(*ptr) == 0)
{
*errorcodeptr = ERR63;
goto FAILED;
}
}
else if (refsign == '-') else if (refsign == '-')
{ {
if (g_ascii_isdigit(ptr[1]) == 0) if (g_ascii_isdigit(ptr[1]) == 0)
@ -5621,7 +5691,6 @@ to fill in forward references to subpatterns. */
uschar cworkspace[COMPILE_WORK_SIZE]; uschar cworkspace[COMPILE_WORK_SIZE];
/* Set this early so that early errors get offset 0. */ /* Set this early so that early errors get offset 0. */
ptr = (const uschar *)pattern; ptr = (const uschar *)pattern;
@ -5782,7 +5851,7 @@ to compile parts of the pattern into; the compiled code is discarded when it is
no longer needed, so hopefully this workspace will never overflow, though there no longer needed, so hopefully this workspace will never overflow, though there
is a test for its doing so. */ is a test for its doing so. */
cd->bracount = 0; cd->bracount = cd->final_bracount = 0;
cd->names_found = 0; cd->names_found = 0;
cd->name_entry_size = 0; cd->name_entry_size = 0;
cd->name_table = NULL; cd->name_table = NULL;
@ -5859,6 +5928,7 @@ field. Reset the bracket count and the names_found field. Also reset the hwm
field; this time it's used for remembering forward references to subpatterns. field; this time it's used for remembering forward references to subpatterns.
*/ */
cd->final_bracount = cd->bracount; /* Save for checking forward references */
cd->bracount = 0; cd->bracount = 0;
cd->names_found = 0; cd->names_found = 0;
cd->name_table = (uschar *)re + re->name_table_offset; cd->name_table = (uschar *)re + re->name_table_offset;

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -4670,10 +4670,10 @@ for(;;)
if (first_byte_caseless) if (first_byte_caseless)
while (start_match < end_subject && while (start_match < end_subject &&
md->lcc[*start_match] != first_byte) md->lcc[*start_match] != first_byte)
start_match++; { NEXTCHAR(start_match); }
else else
while (start_match < end_subject && *start_match != first_byte) while (start_match < end_subject && *start_match != first_byte)
start_match++; { NEXTCHAR(start_match); }
} }
/* Or to just after a linebreak for a multiline match if possible */ /* Or to just after a linebreak for a multiline match if possible */
@ -4683,7 +4683,7 @@ for(;;)
if (start_match > md->start_subject + start_offset) if (start_match > md->start_subject + start_offset)
{ {
while (start_match <= end_subject && !WAS_NEWLINE(start_match)) while (start_match <= end_subject && !WAS_NEWLINE(start_match))
start_match++; { NEXTCHAR(start_match); }
/* If we have just passed a CR and the newline option is ANY or ANYCRLF, /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
and we are now at a LF, advance the match position by one more character. and we are now at a LF, advance the match position by one more character.
@ -4704,7 +4704,9 @@ for(;;)
while (start_match < end_subject) while (start_match < end_subject)
{ {
register unsigned int c = *start_match; register unsigned int c = *start_match;
if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break; if ((start_bits[c/8] & (1 << (c&7))) == 0)
{ NEXTCHAR(start_match); }
else break;
} }
} }

View File

@ -2,11 +2,11 @@
* Perl-Compatible Regular Expressions * * Perl-Compatible Regular Expressions *
*************************************************/ *************************************************/
/*PCRE is a library of functions to support regular expressions whose syntax /* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -7,7 +7,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -363,6 +363,7 @@ never be called in byte mode. To make sure it can never even appear when UTF-8
support is omitted, we don't even define it. */ support is omitted, we don't even define it. */
#ifndef SUPPORT_UTF8 #ifndef SUPPORT_UTF8
#define NEXTCHAR(p) p++;
#define GETCHAR(c, eptr) c = *eptr; #define GETCHAR(c, eptr) c = *eptr;
#define GETCHARTEST(c, eptr) c = *eptr; #define GETCHARTEST(c, eptr) c = *eptr;
#define GETCHARINC(c, eptr) c = *eptr++; #define GETCHARINC(c, eptr) c = *eptr++;
@ -372,6 +373,13 @@ support is omitted, we don't even define it. */
#else /* SUPPORT_UTF8 */ #else /* SUPPORT_UTF8 */
/* Advance a character pointer one byte in non-UTF-8 mode and by one character
in UTF-8 mode. */
#define NEXTCHAR(p) \
p++; \
if (utf8) { while((*p & 0xc0) == 0x80) p++; }
/* Get the next UTF-8 character, not advancing the pointer. This is called when /* Get the next UTF-8 character, not advancing the pointer. This is called when
we know we are in UTF-8 mode. */ we know we are in UTF-8 mode. */
@ -535,7 +543,7 @@ req_byte match. */
#define REQ_BYTE_MAX 1000 #define REQ_BYTE_MAX 1000
/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a /* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
variable-length repeat, or anything other than literal characters. */ variable-length repeat, or a anything other than literal characters. */
#define REQ_CASELESS 0x0100 /* indicates caselessness */ #define REQ_CASELESS 0x0100 /* indicates caselessness */
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */ #define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
@ -868,7 +876,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61 }; ERR60, ERR61, ERR62, ERR63 };
/* The real format of the start of the pcre block; the index of names and the /* The real format of the start of the pcre block; the index of names and the
code vector run on as long as necessary after the end. We store an explicit code vector run on as long as necessary after the end. We store an explicit
@ -931,7 +939,8 @@ typedef struct compile_data {
uschar *name_table; /* The name/number table */ uschar *name_table; /* The name/number table */
int names_found; /* Number of entries so far */ int names_found; /* Number of entries so far */
int name_entry_size; /* Size of each entry */ int name_entry_size; /* Size of each entry */
int bracount; /* Count of capturing parens */ int bracount; /* Count of capturing parens as we compile */
int final_bracount; /* Saved value after first pass */
int top_backref; /* Maximum back reference */ int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */ unsigned int backref_map; /* Bitmap of low back refs */
int external_options; /* External (initial) options */ int external_options; /* External (initial) options */
@ -1033,7 +1042,7 @@ typedef struct dfa_match_data {
#define ctype_letter 0x02 #define ctype_letter 0x02
#define ctype_digit 0x04 #define ctype_digit 0x04
#define ctype_xdigit 0x08 #define ctype_xdigit 0x08
#define ctype_word 0x10 /* alphameric or '_' */ #define ctype_word 0x10 /* alphanumeric or '_' */
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */ #define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set /* Offsets for the bitmap tables in pcre_cbits. Each table contains a set

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2007 University of Cambridge Copyright (c) 1997-2008 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without