regex: Import PCRE 8.31

https://bugzilla.gnome.org/show_bug.cgi?id=679193
This commit is contained in:
Christian Persch 2012-06-14 22:15:27 +02:00
parent f66052fc87
commit 9457833010
12 changed files with 2090 additions and 1127 deletions

View File

@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE_MAJOR 8
#define PCRE_MINOR 30
#define PCRE_MINOR 31
#define PCRE_PRERELEASE
#define PCRE_DATE 2012-02-04
#define PCRE_DATE 2012-07-06
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
@ -179,6 +179,7 @@ compiling). */
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
#define PCRE_ERROR_BADMODE (-28)
#define PCRE_ERROR_BADENDIANNESS (-29)
#define PCRE_ERROR_DFA_BADRESTART (-30)
/* Specific error codes for UTF-8 validity checks */
@ -234,6 +235,7 @@ compiling). */
#define PCRE_INFO_MINLENGTH 15
#define PCRE_INFO_JIT 16
#define PCRE_INFO_JITSIZE 17
#define PCRE_INFO_MAXLOOKBEHIND 18
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
@ -254,7 +256,9 @@ compatible. */
/* Request types for pcre_study(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_STUDY_JIT_COMPILE 0x0001
#define PCRE_STUDY_JIT_COMPILE 0x0001
#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004
/* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */

View File

@ -52,7 +52,11 @@ supporting internal functions that are not used by other modules. */
#include "pcre_internal.h"
#ifdef GLIB_COMPILATION
#include "gstrfuncs.h"
#else
#include <glib.h>
#endif
/* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
is also used by pcretest. PCRE_DEBUG is not defined when building a production
@ -490,6 +494,9 @@ static const char error_texts[] =
"too many forward references\0"
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
"invalid UTF-16 string\0"
/* 75 */
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character value in \\u.... sequence is too large\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@ -831,6 +838,18 @@ else
c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
#endif
}
#ifdef COMPILE_PCRE8
if (c > (utf ? 0x10ffff : 0xff))
#else
#ifdef COMPILE_PCRE16
if (c > (utf ? 0x10ffff : 0xffff))
#endif
#endif
{
*errorcodeptr = ERR76;
}
else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
}
}
else
@ -2227,32 +2246,60 @@ for (;;)
{
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_EXACT:
case OP_EXACTI:
case OP_NOTEXACT:
case OP_NOTEXACTI:
case OP_UPTO:
case OP_UPTOI:
case OP_NOTUPTO:
case OP_NOTUPTOI:
case OP_MINUPTO:
case OP_MINUPTOI:
case OP_NOTMINUPTO:
case OP_NOTMINUPTOI:
case OP_POSUPTO:
case OP_POSUPTOI:
case OP_NOTPOSUPTO:
case OP_NOTPOSUPTOI:
case OP_STAR:
case OP_STARI:
case OP_NOTSTAR:
case OP_NOTSTARI:
case OP_MINSTAR:
case OP_MINSTARI:
case OP_NOTMINSTAR:
case OP_NOTMINSTARI:
case OP_POSSTAR:
case OP_POSSTARI:
case OP_NOTPOSSTAR:
case OP_NOTPOSSTARI:
case OP_PLUS:
case OP_PLUSI:
case OP_NOTPLUS:
case OP_NOTPLUSI:
case OP_MINPLUS:
case OP_MINPLUSI:
case OP_NOTMINPLUS:
case OP_NOTMINPLUSI:
case OP_POSPLUS:
case OP_POSPLUSI:
case OP_NOTPOSPLUS:
case OP_NOTPOSPLUSI:
case OP_QUERY:
case OP_QUERYI:
case OP_NOTQUERY:
case OP_NOTQUERYI:
case OP_MINQUERY:
case OP_MINQUERYI:
case OP_NOTMINQUERY:
case OP_NOTMINQUERYI:
case OP_POSQUERY:
case OP_POSQUERYI:
case OP_NOTPOSQUERY:
case OP_NOTPOSQUERYI:
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
break;
}
@ -3071,22 +3118,28 @@ if (next >= 0) switch(op_code)
#endif /* SUPPORT_UTF */
return (c != TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */
/* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
opcodes are not used for multi-byte characters, because they are coded using
an XCLASS instead. */
case OP_NOT:
return (c = *previous) == next;
#ifdef SUPPORT_UTF
GETCHARTEST(c, previous);
#else
c = *previous;
#endif
return c == next;
case OP_NOTI:
if ((c = *previous) == next) return TRUE;
#ifdef SUPPORT_UTF
GETCHARTEST(c, previous);
#else
c = *previous;
#endif
if (c == next) return TRUE;
#ifdef SUPPORT_UTF
if (utf)
{
unsigned int othercase;
if (next < 128) othercase = cd->fcc[next]; else
#ifdef SUPPORT_UCP
othercase = UCD_OTHERCASE(next);
othercase = UCD_OTHERCASE((unsigned int)next);
#else
othercase = NOTACHAR;
#endif
@ -3094,28 +3147,28 @@ if (next >= 0) switch(op_code)
}
else
#endif /* SUPPORT_UTF */
return (c == (int)(TABLE_GET((unsigned int)next, cd->fcc, next))); /* Non-UTF-8 mode */
return (c == TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */
/* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
case OP_DIGIT:
return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;
return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;
case OP_NOT_DIGIT:
return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;
return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;
case OP_WHITESPACE:
return next > 127 || (cd->ctypes[next] & ctype_space) == 0;
return next > 255 || (cd->ctypes[next] & ctype_space) == 0;
case OP_NOT_WHITESPACE:
return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;
return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;
case OP_WORDCHAR:
return next > 127 || (cd->ctypes[next] & ctype_word) == 0;
return next > 255 || (cd->ctypes[next] & ctype_word) == 0;
case OP_NOT_WORDCHAR:
return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;
return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;
case OP_HSPACE:
case OP_NOT_HSPACE:
@ -3193,22 +3246,22 @@ switch(op_code)
switch(-next)
{
case ESC_d:
return c > 127 || (cd->ctypes[c] & ctype_digit) == 0;
return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;
case ESC_D:
return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0;
return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0;
case ESC_s:
return c > 127 || (cd->ctypes[c] & ctype_space) == 0;
return c > 255 || (cd->ctypes[c] & ctype_space) == 0;
case ESC_S:
return c <= 127 && (cd->ctypes[c] & ctype_space) != 0;
return c <= 255 && (cd->ctypes[c] & ctype_space) != 0;
case ESC_w:
return c > 127 || (cd->ctypes[c] & ctype_word) == 0;
return c > 255 || (cd->ctypes[c] & ctype_word) == 0;
case ESC_W:
return c <= 127 && (cd->ctypes[c] & ctype_word) != 0;
return c <= 255 && (cd->ctypes[c] & ctype_word) != 0;
case ESC_h:
case ESC_H:
@ -3317,10 +3370,10 @@ switch(op_code)
return next == -ESC_d;
case OP_WHITESPACE:
return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;
return next == -ESC_S || next == -ESC_d || next == -ESC_w;
case OP_NOT_WHITESPACE:
return next == -ESC_s || next == -ESC_h || next == -ESC_v;
return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R;
case OP_HSPACE:
return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
@ -4484,42 +4537,35 @@ for (;; ptr++)
LONE_SINGLE_CHARACTER:
/* Only the value of 1 matters for class_single_char. */
if (class_single_char < 2) class_single_char++;
/* If class_charcount is 1, we saw precisely one character. As long as
there were no negated characters >= 128 and there was no use of \p or \P,
in other words, no use of any XCLASS features, we can optimize.
In UTF-8 mode, we can optimize the negative case only if there were no
characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
operate on single-bytes characters only. This is an historical hangover.
Maybe one day we can tidy these opcodes to handle multi-byte characters.
there was no use of \p or \P, in other words, no use of any XCLASS
features, we can optimize.
The optimization throws away the bit map. We turn the item into a
1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
Note that OP_NOT[I] does not support multibyte characters. In the positive
case, it can cause firstchar to be set. Otherwise, there can be no first
char if this item is first, whatever repeat count may follow. In the case
of reqchar, save the previous value for reinstating. */
In the positive case, it can cause firstchar to be set. Otherwise, there
can be no first char if this item is first, whatever repeat count may
follow. In the case of reqchar, save the previous value for reinstating. */
#ifdef SUPPORT_UTF
if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET
&& (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))
#else
if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
#endif
{
ptr++;
zeroreqchar = reqchar;
/* The OP_NOT[I] opcodes work on single characters only. */
if (negate_class)
{
if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
zerofirstchar = firstchar;
*code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
*code++ = c;
#ifdef SUPPORT_UTF
if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
code += PRIV(ord2utf)(c, code);
else
#endif
*code++ = c;
goto NOT_CHAR;
}
@ -4777,15 +4823,23 @@ for (;; ptr++)
/* Now handle repetition for the different types of item. */
/* If previous was a character match, abolish the item and generate a
repeat item instead. If a char item has a minumum of more than one, ensure
that it is set in reqchar - it might not be if a sequence such as x{3} is
the first thing in a branch because the x will have gone into firstchar
instead. */
/* If previous was a character or negated character match, abolish the item
and generate a repeat item instead. If a char item has a minimum of more
than one, ensure that it is set in reqchar - it might not be if a sequence
such as x{3} is the first thing in a branch because the x will have gone
into firstchar instead. */
if (*previous == OP_CHAR || *previous == OP_CHARI)
if (*previous == OP_CHAR || *previous == OP_CHARI
|| *previous == OP_NOT || *previous == OP_NOTI)
{
op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;
switch (*previous)
{
default: /* Make compiler happy. */
case OP_CHAR: op_type = OP_STAR - OP_STAR; break;
case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break;
case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break;
}
/* Deal with UTF characters that take up more than one character. It's
easier to write this out separately than try to macrify it. Use c to
@ -4808,7 +4862,8 @@ for (;; ptr++)
with UTF disabled, or for a single character UTF character. */
{
c = code[-1];
if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
if (*previous <= OP_CHARI && repeat_min > 1)
reqchar = c | req_caseopt | cd->req_varyopt;
}
/* If the repetition is unlimited, it pays to see if the next thing on
@ -4827,26 +4882,6 @@ for (;; ptr++)
goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
}
/* If previous was a single negated character ([^a] or similar), we use
one of the special opcodes, replacing it. The code is shared with single-
character repeats by setting opt_type to add a suitable offset into
repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI
are currently used only for single-byte chars. */
else if (*previous == OP_NOT || *previous == OP_NOTI)
{
op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR;
c = previous[1];
if (!possessive_quantifier &&
repeat_max < 0 &&
check_auto_possessive(previous, utf, ptr + 1, options, cd))
{
repeat_type = 0; /* Force greedy */
possessive_quantifier = TRUE;
}
goto OUTPUT_SINGLE_REPEAT;
}
/* If previous was a character type match (\d or similar), abolish it and
create a suitable repeat item. The code is shared with single-character
repeats by setting op_type to add a suitable offset into repeat_type. Note
@ -5587,6 +5622,11 @@ for (;; ptr++)
arg = ++ptr;
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
arglen = (int)(ptr - arg);
if (arglen > (int)MAX_MARK)
{
*errorcodeptr = ERR75;
goto FAILED;
}
}
if (*ptr != CHAR_RIGHT_PARENTHESIS)
@ -6838,10 +6878,13 @@ for (;; ptr++)
/* For the rest (including \X when Unicode properties are supported), we
can obtain the OP value by negating the escape value in the default
situation when PCRE_UCP is not set. When it *is* set, we substitute
Unicode property tests. */
Unicode property tests. Note that \b and \B do a one-character
lookbehind. */
else
{
if ((-c == ESC_b || -c == ESC_B) && cd->max_lookbehind == 0)
cd->max_lookbehind = 1;
#ifdef SUPPORT_UCP
if (-c >= ESC_DU && -c <= ESC_wu)
{
@ -7149,7 +7192,12 @@ for (;;)
*ptrptr = ptr;
return FALSE;
}
else { PUT(reverse_count, 0, fixed_length); }
else
{
if (fixed_length > cd->max_lookbehind)
cd->max_lookbehind = fixed_length;
PUT(reverse_count, 0, fixed_length);
}
}
}
@ -7819,6 +7867,7 @@ cd->start_pattern = (const pcre_uchar *)pattern;
cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
cd->req_varyopt = 0;
cd->assert_depth = 0;
cd->max_lookbehind = 0;
cd->external_options = options;
cd->external_flags = 0;
cd->open_caps = NULL;
@ -7869,7 +7918,6 @@ re->magic_number = MAGIC_NUMBER;
re->size = (int)size;
re->options = cd->external_options;
re->flags = cd->external_flags;
re->dummy1 = 0;
re->first_char = 0;
re->req_char = 0;
re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
@ -7889,6 +7937,7 @@ field; this time it's used for remembering forward references to subpatterns.
cd->final_bracount = cd->bracount; /* Save for checking forward references */
cd->assert_depth = 0;
cd->bracount = 0;
cd->max_lookbehind = 0;
cd->names_found = 0;
cd->name_table = (pcre_uchar *)re + re->name_table_offset;
codestart = cd->name_table + re->name_entry_size * re->name_count;
@ -7910,6 +7959,7 @@ code = (pcre_uchar *)codestart;
&firstchar, &reqchar, NULL, cd, NULL);
re->top_bracket = cd->bracount;
re->top_backref = cd->top_backref;
re->max_lookbehind = cd->max_lookbehind;
re->flags = cd->external_flags | PCRE_MODE;
if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */
@ -7997,6 +8047,7 @@ if (cd->check_lookbehind)
(fixed_length == -4)? ERR70 : ERR25;
break;
}
if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length;
PUT(cc, 1, fixed_length);
}
cc += 1 + LINK_SIZE;

View File

@ -38,10 +38,9 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_dfa_exec(), which is an
alternative matching function that uses a sort of DFA algorithm (not a true
FSM). This is NOT Perl- compatible, but it has advantages in certain
FSM). This is NOT Perl-compatible, but it has advantages in certain
applications. */
@ -282,7 +281,7 @@ typedef struct stateblock {
int data; /* Some use extra data */
} stateblock;
#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
#ifdef PCRE_DEBUG
@ -382,7 +381,8 @@ for the current character, one for the following character). */
next_new_state->count = (y); \
next_new_state->data = (z); \
next_new_state++; \
DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
(x), (y), (z), __LINE__)); \
} \
else return PCRE_ERROR_DFA_WSSIZE
@ -424,6 +424,8 @@ BOOL utf = (md->poptions & PCRE_UTF8) != 0;
BOOL utf = FALSE;
#endif
BOOL reset_could_continue = FALSE;
rlevel++;
offsetcount &= (-2);
@ -571,7 +573,9 @@ for (;;)
int clen, dlen;
unsigned int c, d;
int forced_fail = 0;
BOOL could_continue = FALSE;
BOOL partial_newline = FALSE;
BOOL could_continue = reset_could_continue;
reset_could_continue = FALSE;
/* Make the new state list into the active state list and empty the
new state list. */
@ -607,7 +611,7 @@ for (;;)
if (ptr < end_subject)
{
clen = 1; /* Number of bytes in the character */
clen = 1; /* Number of data items in the character */
#ifdef SUPPORT_UTF
if (utf) { GETCHARLEN(c, ptr, clen); } else
#endif /* SUPPORT_UTF */
@ -641,7 +645,8 @@ for (;;)
/* A negative offset is a special case meaning "hold off going to this
(negated) state until the number of characters in the data field have
been skipped". */
been skipped". If the could_continue flag was passed over from a previous
state, arrange for it to passed on. */
if (state_offset < 0)
{
@ -650,6 +655,7 @@ for (;;)
DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
ADD_NEW_DATA(state_offset, current_state->count,
current_state->data - 1);
if (could_continue) reset_could_continue = TRUE;
continue;
}
else
@ -689,10 +695,10 @@ for (;;)
permitted.
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
argument that is not a data character - but is always one byte long. We
have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in
this case. To keep the other cases fast, convert these ones to new opcodes.
*/
argument that is not a data character - but is always one byte long because
the values are small. We have to take special action to deal with \P, \p,
\H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
these ones to new opcodes. */
if (coptable[codevalue] > 0)
{
@ -783,7 +789,7 @@ for (;;)
offsets[0] = (int)(current_subject - start_subject);
offsets[1] = (int)(ptr - start_subject);
DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
offsets[1] - offsets[0], current_subject));
offsets[1] - offsets[0], (char *)current_subject));
}
if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
{
@ -888,7 +894,20 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_ANY:
if (clen > 0 && !IS_NEWLINE(ptr))
{ ADD_NEW(state_offset + 1, 0); }
{
if (ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else
{
ADD_NEW(state_offset + 1, 0);
}
}
break;
/*-----------------------------------------------------------------*/
@ -916,6 +935,19 @@ for (;;)
(ptr == end_subject - md->nllen)
))
{ ADD_ACTIVE(state_offset + 1, 0); }
else if (ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
{
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
else could_continue = partial_newline = TRUE;
}
}
break;
@ -928,6 +960,19 @@ for (;;)
else if (clen == 0 ||
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
{ ADD_ACTIVE(state_offset + 1, 0); }
else if (ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
{
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
else could_continue = partial_newline = TRUE;
}
}
else if (IS_NEWLINE(ptr))
{ ADD_ACTIVE(state_offset + 1, 0); }
@ -1090,7 +1135,15 @@ for (;;)
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)
{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1113,7 +1166,15 @@ for (;;)
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1135,7 +1196,15 @@ for (;;)
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1155,7 +1224,15 @@ for (;;)
count = current_state->count; /* Number already matched */
if (clen > 0)
{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1176,7 +1253,15 @@ for (;;)
count = current_state->count; /* Number already matched */
if (clen > 0)
{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1824,6 +1909,8 @@ for (;;)
ncount++;
nptr += ndlen;
}
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
if (++count >= GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else
@ -2037,6 +2124,8 @@ for (;;)
ncount++;
nptr += nclen;
}
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
}
break;
@ -2062,7 +2151,13 @@ for (;;)
break;
case 0x000d:
if (ptr + 1 < end_subject && ptr[1] == 0x0a)
if (ptr + 1 >= end_subject)
{
ADD_NEW(state_offset + 1, 0);
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
}
else if (ptr[1] == 0x0a)
{
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
@ -2171,22 +2266,32 @@ for (;;)
break;
/*-----------------------------------------------------------------*/
/* Match a negated single character casefully. This is only used for
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */
/* Match a negated single character casefully. */
case OP_NOT:
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
break;
/*-----------------------------------------------------------------*/
/* Match a negated single character caselessly. This is only used for
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */
/* Match a negated single character caselessly. */
case OP_NOTI:
if (clen > 0 && c != d && c != fcc[d])
{ ADD_NEW(state_offset + dlen + 1, 0); }
if (clen > 0)
{
unsigned int otherd;
#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
#endif /* SUPPORT_UTF */
otherd = TABLE_GET(d, fcc, d);
if (c != d && c != otherd)
{ ADD_NEW(state_offset + dlen + 1, 0); }
}
break;
/*-----------------------------------------------------------------*/
@ -2692,9 +2797,12 @@ for (;;)
{
int charcount = local_offsets[rc+1] - local_offsets[rc];
#ifdef SUPPORT_UTF
const pcre_uchar *p = start_subject + local_offsets[rc];
const pcre_uchar *pp = start_subject + local_offsets[rc+1];
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
if (utf)
{
const pcre_uchar *p = start_subject + local_offsets[rc];
const pcre_uchar *pp = start_subject + local_offsets[rc+1];
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
}
#endif
if (charcount > 0)
{
@ -2793,7 +2901,7 @@ for (;;)
const pcre_uchar *pp = local_ptr;
charcount = (int)(pp - p);
#ifdef SUPPORT_UTF
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
}
@ -2875,9 +2983,12 @@ for (;;)
else
{
#ifdef SUPPORT_UTF
const pcre_uchar *p = start_subject + local_offsets[0];
const pcre_uchar *pp = start_subject + local_offsets[1];
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
if (utf)
{
const pcre_uchar *p = start_subject + local_offsets[0];
const pcre_uchar *pp = start_subject + local_offsets[1];
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
}
#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
if (repeat_state_offset >= 0)
@ -2946,7 +3057,7 @@ for (;;)
if (new_count <= 0)
{
if (rlevel == 1 && /* Top level, and */
could_continue && /* Some could go on */
could_continue && /* Some could go on, and */
forced_fail != workspace[1] && /* Not all forced fail & */
( /* either... */
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
@ -2954,8 +3065,13 @@ for (;;)
((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
match_count < 0) /* no matches */
) && /* And... */
ptr >= end_subject && /* Reached end of subject */
ptr > md->start_used_ptr) /* Inspected non-empty string */
(
partial_newline || /* Either partial NL */
( /* or ... */
ptr >= end_subject && /* End of subject and */
ptr > md->start_used_ptr) /* Inspected non-empty string */
)
)
{
if (offsetcount >= 2)
{
@ -3052,10 +3168,27 @@ if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
/* We need to find the pointer to any study data before we test for byte
flipping, so we scan the extra_data block first. This may set two fields in the
match block, so we must initialize them beforehand. However, the other fields
in the match block must not be set until after the byte flipping. */
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
means that the pattern is likely compiled with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
/* If restarting after a partial match, do some sanity checks on the contents
of the workspace. */
if ((options & PCRE_DFA_RESTART) != 0)
{
if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
return PCRE_ERROR_DFA_BADRESTART;
}
/* Set up study, callout, and table data */
md->tables = re->tables;
md->callout_data = NULL;
@ -3074,16 +3207,6 @@ if (extra_data != NULL)
md->tables = extra_data->tables;
}
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
means that the pattern is likely compiled with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
/* Set some local values */
current_subject = (const pcre_uchar *)subject + start_offset;

View File

@ -37,7 +37,6 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains pcre_exec(), the externally visible function that does
pattern matching using an NFA algorithm, trying to mimic Perl as closely as
possible. There are also some static supporting functions. */
@ -140,7 +139,9 @@ Arguments:
md points to match data block
caseless TRUE if caseless
Returns: < 0 if not matched, otherwise the number of subject bytes matched
Returns: >= 0 the number of subject bytes matched
-1 no match
-2 partial match; always given if at end subject
*/
static int
@ -163,7 +164,8 @@ pchars(p, length, FALSE, md);
printf("\n");
#endif
/* Always fail if reference not set (and not JavaScript compatible). */
/* Always fail if reference not set (and not JavaScript compatible - in that
case the length is passed as zero). */
if (length < 0) return -1;
@ -189,7 +191,7 @@ if (caseless)
while (p < endptr)
{
int c, d;
if (eptr >= md->end_subject) return -1;
if (eptr >= md->end_subject) return -2; /* Partial match */
GETCHARINC(c, eptr);
GETCHARINC(d, p);
if (c != d && c != UCD_OTHERCASE(d)) return -1;
@ -202,9 +204,9 @@ if (caseless)
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
is no UCP support. */
{
if (eptr + length > md->end_subject) return -1;
while (length-- > 0)
{
if (eptr >= md->end_subject) return -2; /* Partial match */
if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
p++;
eptr++;
@ -217,8 +219,11 @@ are in UTF-8 mode. */
else
{
if (eptr + length > md->end_subject) return -1;
while (length-- > 0) if (*p++ != *eptr++) return -1;
while (length-- > 0)
{
if (eptr >= md->end_subject) return -2; /* Partial match */
if (*p++ != *eptr++) return -1;
}
}
return (int)(eptr - eptr_start);
@ -311,9 +316,15 @@ argument of match(), which never changes. */
#define RMATCH(ra,rb,rc,rd,re,rw)\
{\
heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
frame->Xwhere = rw; \
heapframe *newframe = frame->Xnextframe;\
if (newframe == NULL)\
{\
newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
newframe->Xnextframe = NULL;\
frame->Xnextframe = newframe;\
}\
frame->Xwhere = rw;\
newframe->Xeptr = ra;\
newframe->Xecode = rb;\
newframe->Xmstart = mstart;\
@ -332,7 +343,6 @@ argument of match(), which never changes. */
{\
heapframe *oldframe = frame;\
frame = oldframe->Xprevframe;\
if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
if (frame != NULL)\
{\
rrc = ra;\
@ -346,6 +356,7 @@ argument of match(), which never changes. */
typedef struct heapframe {
struct heapframe *Xprevframe;
struct heapframe *Xnextframe;
/* Function arguments that may change */
@ -492,9 +503,7 @@ the top-level on the stack rather than malloc-ing them all gives a performance
boost in many cases where there is not much "recursion". */
#ifdef NO_RECURSE
heapframe frame_zero;
heapframe *frame = &frame_zero;
frame->Xprevframe = NULL; /* Marks the top level */
heapframe *frame = (heapframe *)md->match_frames_base;
/* Copy in the original argument variables */
@ -897,7 +906,6 @@ for (;;)
}
else /* OP_KETRMAX */
{
md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
ecode += 1 + LINK_SIZE;
@ -1026,7 +1034,8 @@ for (;;)
for (;;)
{
if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
if (op >= OP_SBRA || op == OP_ONCE)
md->match_function_type = MATCH_CBEGROUP;
/* If this is not a possibly empty group, and there are no (*THEN)s in
the pattern, and this is the final alternative, optimize as described
@ -1565,13 +1574,18 @@ for (;;)
mstart = md->start_match_ptr; /* In case \K reset it */
break;
}
md->mark = save_mark;
/* PCRE does not allow THEN to escape beyond an assertion; it is treated
as NOMATCH. */
/* A COMMIT failure must fail the entire assertion, without trying any
subsequent branches. */
if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
/* PCRE does not allow THEN to escape beyond an assertion; it
is treated as NOMATCH. */
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
ecode += GET(ecode, 1);
md->mark = save_mark;
}
while (*ecode == OP_ALT);
@ -1779,10 +1793,11 @@ for (;;)
goto RECURSION_MATCHED; /* Exit loop; end processing */
}
/* PCRE does not allow THEN to escape beyond a recursion; it is treated
as NOMATCH. */
/* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
is treated as NOMATCH. */
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
rrc != MATCH_COMMIT)
{
DPRINTF(("Recursion gave error %d\n", rrc));
if (new_recursive.offset_save != stacksave)
@ -1993,7 +2008,6 @@ for (;;)
}
if (*prev >= OP_SBRA) /* Could match an empty string */
{
md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
RRETURN(rrc);
}
@ -2002,7 +2016,6 @@ for (;;)
}
else /* OP_KETRMAX */
{
if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@ -2059,7 +2072,21 @@ for (;;)
case OP_DOLLM:
if (eptr < md->end_subject)
{ if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
{
if (!IS_NEWLINE(eptr))
{
if (md->partial != 0 &&
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
RRETURN(MATCH_NOMATCH);
}
}
else
{
if (md->noteol) RRETURN(MATCH_NOMATCH);
@ -2091,7 +2118,18 @@ for (;;)
ASSERT_NL_OR_EOS:
if (eptr < md->end_subject &&
(!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
{
if (md->partial != 0 &&
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
RRETURN(MATCH_NOMATCH);
}
/* Either at end of string or \n before end. */
@ -2219,12 +2257,25 @@ for (;;)
}
break;
/* Match a single character type; inline for speed */
/* Match any single character type except newline; have to take care with
CRLF newlines and partial matching. */
case OP_ANY:
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
if (md->partial != 0 &&
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
/* Fall through */
/* Match any single character whatsoever. */
case OP_ALLANY:
if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
{ /* not be updated before SCHECK_PARTIAL. */
@ -2365,7 +2416,11 @@ for (;;)
default: RRETURN(MATCH_NOMATCH);
case 0x000d:
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
}
else if (*eptr == 0x0a) eptr++;
break;
case 0x000a:
@ -2595,6 +2650,7 @@ for (;;)
if (UCD_CATEGORY(c) != ucp_M) break;
eptr += len;
}
CHECK_PARTIAL();
ecode++;
break;
#endif
@ -2660,6 +2716,7 @@ for (;;)
default: /* No repeat follows */
if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
{
if (length == -2) eptr = md->end_subject; /* Partial match */
CHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
@ -2685,6 +2742,7 @@ for (;;)
int slength;
if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
{
if (slength == -2) eptr = md->end_subject; /* Partial match */
CHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
@ -2708,6 +2766,7 @@ for (;;)
if (fi >= max) RRETURN(MATCH_NOMATCH);
if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
{
if (slength == -2) eptr = md->end_subject; /* Partial match */
CHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
@ -2726,11 +2785,20 @@ for (;;)
int slength;
if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
{
CHECK_PARTIAL();
/* Can't use CHECK_PARTIAL because we don't want to update eptr in
the soft partial matching case. */
if (slength == -2 && md->partial != 0 &&
md->end_subject > md->start_used_ptr)
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
break;
}
eptr += slength;
}
while (eptr >= pp)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
@ -3360,7 +3428,7 @@ for (;;)
maximizing, find the maximum number of characters and work backwards. */
DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
max, eptr));
max, (char *)eptr));
if (op >= OP_STARI) /* Caseless */
{
@ -3504,33 +3572,41 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
ecode++;
GETCHARINCTEST(c, eptr);
if (op == OP_NOTI) /* The caseless case */
#ifdef SUPPORT_UTF
if (utf)
{
register unsigned int ch, och;
ch = *ecode++;
#ifdef COMPILE_PCRE8
/* ch must be < 128 if UTF is enabled. */
och = md->fcc[ch];
#else
#ifdef SUPPORT_UTF
#ifdef SUPPORT_UCP
if (utf && ch > 127)
och = UCD_OTHERCASE(ch);
#else
if (utf && ch > 127)
och = ch;
#endif /* SUPPORT_UCP */
ecode++;
GETCHARINC(ch, ecode);
GETCHARINC(c, eptr);
if (op == OP_NOT)
{
if (ch == c) RRETURN(MATCH_NOMATCH);
}
else
#endif /* SUPPORT_UTF */
och = TABLE_GET(ch, md->fcc, ch);
#endif /* COMPILE_PCRE8 */
if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
{
#ifdef SUPPORT_UCP
if (ch > 127)
och = UCD_OTHERCASE(ch);
#else
if (ch > 127)
och = ch;
#endif /* SUPPORT_UCP */
else
och = TABLE_GET(ch, md->fcc, ch);
if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
}
}
else /* Caseful */
else
#endif
{
if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
register unsigned int ch = ecode[1];
c = *eptr++;
if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
RRETURN(MATCH_NOMATCH);
ecode += 2;
}
break;
@ -3610,7 +3686,7 @@ for (;;)
/* Common code for all repeated single-byte matches. */
REPEATNOTCHAR:
fc = *ecode++;
GETCHARINCTEST(fc, ecode);
/* The code is duplicated for the caseless and caseful cases, for speed,
since matching characters is likely to be quite common. First, ensure the
@ -3621,14 +3697,10 @@ for (;;)
characters and work backwards. */
DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
max, eptr));
max, (char *)eptr));
if (op >= OP_NOTSTARI) /* Caseless */
{
#ifdef COMPILE_PCRE8
/* fc must be < 128 if UTF is enabled. */
foc = md->fcc[fc];
#else
#ifdef SUPPORT_UTF
#ifdef SUPPORT_UCP
if (utf && fc > 127)
@ -3640,7 +3712,6 @@ for (;;)
else
#endif /* SUPPORT_UTF */
foc = TABLE_GET(fc, md->fcc, fc);
#endif /* COMPILE_PCRE8 */
#ifdef SUPPORT_UTF
if (utf)
@ -3654,7 +3725,7 @@ for (;;)
RRETURN(MATCH_NOMATCH);
}
GETCHARINC(d, eptr);
if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH);
if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
}
}
else
@ -4164,6 +4235,7 @@ for (;;)
if (UCD_CATEGORY(c) != ucp_M) break;
eptr += len;
}
CHECK_PARTIAL();
}
}
@ -4184,6 +4256,15 @@ for (;;)
RRETURN(MATCH_NOMATCH);
}
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
if (md->partial != 0 &&
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
eptr++;
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
}
@ -4468,6 +4549,15 @@ for (;;)
RRETURN(MATCH_NOMATCH);
}
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
if (md->partial != 0 &&
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
eptr++;
}
break;
@ -4948,6 +5038,7 @@ for (;;)
if (UCD_CATEGORY(c) != ucp_M) break;
eptr += len;
}
CHECK_PARTIAL();
}
}
else
@ -4971,7 +5062,18 @@ for (;;)
GETCHARINC(c, eptr);
switch(ctype)
{
case OP_ANY: /* This is the non-NL case */
case OP_ANY: /* This is the non-NL case */
if (md->partial != 0 && /* Take care with CRLF partial */
eptr >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
break;
case OP_ALLANY:
case OP_ANYBYTE:
break;
@ -5134,7 +5236,18 @@ for (;;)
c = *eptr++;
switch(ctype)
{
case OP_ANY: /* This is the non-NL case */
case OP_ANY: /* This is the non-NL case */
if (md->partial != 0 && /* Take care with CRLF partial */
eptr >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
break;
case OP_ALLANY:
case OP_ANYBYTE:
break;
@ -5491,6 +5604,7 @@ for (;;)
if (UCD_CATEGORY(c) != ucp_M) break;
eptr += len;
}
CHECK_PARTIAL();
}
/* eptr is now past the end of the maximum run */
@ -5534,6 +5648,15 @@ for (;;)
break;
}
if (IS_NEWLINE(eptr)) break;
if (md->partial != 0 && /* Take care with CRLF partial */
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
eptr++;
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
}
@ -5551,6 +5674,15 @@ for (;;)
break;
}
if (IS_NEWLINE(eptr)) break;
if (md->partial != 0 && /* Take care with CRLF partial */
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
eptr++;
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
}
@ -5815,6 +5947,15 @@ for (;;)
break;
}
if (IS_NEWLINE(eptr)) break;
if (md->partial != 0 && /* Take care with CRLF partial */
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
eptr++;
}
break;
@ -6145,6 +6286,31 @@ Undefine all the macros that were defined above to handle this. */
***************************************************************************/
#ifdef NO_RECURSE
/*************************************************
* Release allocated heap frames *
*************************************************/
/* This function releases all the allocated frames. The base frame is on the
machine stack, and so must not be freed.
Argument: the address of the base frame
Returns: nothing
*/
static void
release_match_heapframes (heapframe *frame_base)
{
heapframe *nextframe = frame_base->Xnextframe;
while (nextframe != NULL)
{
heapframe *oldframe = nextframe;
nextframe = nextframe->Xnextframe;
(PUBL(stack_free))(oldframe);
}
}
#endif
/*************************************************
* Execute a Regular Expression *
@ -6207,13 +6373,22 @@ PCRE_PUCHAR req_char_ptr = start_match - 1;
const pcre_study_data *study;
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
#ifdef NO_RECURSE
heapframe frame_zero;
frame_zero.Xprevframe = NULL; /* Marks the top level */
frame_zero.Xnextframe = NULL; /* None are allocated yet */
md->match_frames_base = &frame_zero;
#endif
/* Check for the special magic call that measures the size of the stack used
per recursive call of match(). */
per recursive call of match(). Without the funny casting for sizeof, a Windows
compiler gave this error: "unary minus operator applied to unsigned type,
result still unsigned". Hopefully the cast fixes that. */
if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
start_offset == -999)
#ifdef NO_RECURSE
return -sizeof(heapframe);
return -((int)sizeof(heapframe));
#else
return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
#endif
@ -6280,20 +6455,25 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
/* If the pattern was successfully studied with JIT support, run the JIT
executable instead of the rest of this function. Most options must be set at
compile time for the JIT code to be usable. Fallback to the normal code path if
an unsupported flag is set. In particular, JIT does not support partial
matching. */
an unsupported flag is set. */
#ifdef SUPPORT_JIT
if (extra_data != NULL
&& (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
&& (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
&& extra_data->executable_jit != NULL
&& (extra_data->flags & PCRE_EXTRA_TABLES) == 0
&& (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
return PRIV(jit_exec)(re, extra_data->executable_jit,
(const pcre_uchar *)subject, length, start_offset, options,
((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
{
rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
start_offset, options, offsets, offsetcount);
/* PCRE_ERROR_NULL means that the selected normal or partial matching
mode is not compiled. In this case we simply fallback to interpreter. */
if (rc != PCRE_ERROR_NULL) return rc;
}
#endif
/* Carry on with non-JIT matching. This information is for finding all the
@ -6887,7 +7067,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
{
register int *iptr, *iend;
int resetcount = 2 + re->top_bracket * 2;
if (resetcount > offsetcount) resetcount = ocount;
if (resetcount > offsetcount) resetcount = offsetcount;
iptr = offsets + md->end_offset_top;
iend = offsets + resetcount;
while (iptr < iend) *iptr++ = -1;
@ -6908,6 +7088,9 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
*(extra_data->mark) = (pcre_uchar *)md->mark;
DPRINTF((">>>> returning %d\n", rc));
#ifdef NO_RECURSE
release_match_heapframes(&frame_zero);
#endif
return rc;
}
@ -6925,6 +7108,9 @@ if (using_temporary_offsets)
if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
{
DPRINTF((">>>> error: returning %d\n", rc));
#ifdef NO_RECURSE
release_match_heapframes(&frame_zero);
#endif
return rc;
}
@ -6954,6 +7140,9 @@ else
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
*(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
#ifdef NO_RECURSE
release_match_heapframes(&frame_zero);
#endif
return rc;
}

View File

@ -193,6 +193,10 @@ switch (what)
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
break;
case PCRE_INFO_MAXLOOKBEHIND:
*((int *)where) = re->max_lookbehind;
break;
default: return PCRE_ERROR_BADOPTION;
}

View File

@ -58,7 +58,11 @@ global variables are not used. */
#include "pcre_internal.h"
#ifdef GLIB_COMPILATION
#include "gmem.h"
#else
#include <glib.h>
#endif /* GLIB_COMPILATION */
#if defined _MSC_VER || defined __SYMBIAN32__
static void* LocalPcreMalloc(size_t aSize)

View File

@ -523,6 +523,11 @@ capturing parenthesis numbers in back references. */
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
/* The maximum length of a MARK name is currently one data unit; it may be
changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
#define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)
/* When UTF encoding is being used, a character is no longer just a single
character. The macros for character handling generate simple sequences when
used in character-mode, and more complicated ones for UTF characters.
@ -887,7 +892,8 @@ time, run time, or study time, respectively. */
PCRE_NO_START_OPTIMIZE)
#define PUBLIC_STUDY_OPTIONS \
PCRE_STUDY_JIT_COMPILE
(PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \
PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
/* Magic number to provide a small check against being handed junk. */
@ -1939,7 +1945,11 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT };
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
JIT_NUMBER_OF_COMPILE_MODES };
/* The real format of the start of the pcre block; the index of names and the
code vector run on as long as necessary after the end. We store an explicit
@ -1969,16 +1979,15 @@ typedef struct REAL_PCRE {
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
pcre_uint16 flags; /* Private flags */
pcre_uint16 dummy1; /* For future use */
pcre_uint16 top_bracket;
pcre_uint16 top_backref;
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
pcre_uint16 first_char; /* Starting character */
pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
const pcre_uint8 *nullpad; /* NULL padding */
} REAL_PCRE;
@ -2024,6 +2033,7 @@ typedef struct compile_data {
int workspace_size; /* Size of workspace */
int bracount; /* Count of capturing parens as we compile */
int final_bracount; /* Saved value after first pass */
int max_lookbehind; /* Maximum lookbehind (characters) */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
int assert_depth; /* Depth of nested assertions */
@ -2125,6 +2135,9 @@ typedef struct match_data {
const pcre_uchar *mark; /* Mark pointer to pass back on success */
const pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */
const pcre_uchar *once_target; /* Where to back up to for atomic groups */
#ifdef NO_RECURSE
void *match_frames_base; /* For remembering malloc'd frames */
#endif
} match_data;
/* A similar structure is used for the same purpose by the DFA matching
@ -2179,7 +2192,7 @@ total length. */
#define ctypes_offset (cbits_offset + cbit_length)
#define tables_length (ctypes_offset + 256)
/* Internal function prefix */
/* Internal function and data prefixes. */
#ifdef COMPILE_PCRE8
#ifndef PUBL
@ -2288,9 +2301,10 @@ extern BOOL PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
extern BOOL PRIV(xclass)(int, const pcre_uchar *, BOOL);
#ifdef SUPPORT_JIT
extern void PRIV(jit_compile)(const REAL_PCRE *, PUBL(extra) *);
extern int PRIV(jit_exec)(const REAL_PCRE *, void *,
const pcre_uchar *, int, int, int, int, int *, int);
extern void PRIV(jit_compile)(const REAL_PCRE *,
PUBL(extra) *, int);
extern int PRIV(jit_exec)(const REAL_PCRE *, const PUBL(extra) *,
const pcre_uchar *, int, int, int, int *, int);
extern void PRIV(jit_free)(void *);
extern int PRIV(jit_get_size)(void *);
extern const char* PRIV(jit_get_target)(void);
@ -2298,15 +2312,6 @@ extern const char* PRIV(jit_get_target)(void);
/* Unicode character database (UCD) */
typedef struct {
pcre_uint8 script;
pcre_uint8 chartype;
pcre_int32 other_case;
} ucd_record;
extern const ucd_record PRIV(ucd_records)[];
extern const pcre_uint8 PRIV(ucd_stage1)[];
extern const pcre_uint16 PRIV(ucd_stage2)[];
extern const int PRIV(ucp_gentype)[];
#ifdef SUPPORT_JIT
extern const int PRIV(ucp_typerange)[];

File diff suppressed because it is too large Load Diff

View File

@ -1123,7 +1123,7 @@ do
case OP_HSPACE:
SET_BIT(0x09);
SET_BIT(0x20);
#ifdef COMPILE_PCRE8
#ifdef SUPPORT_UTF
if (utf)
{
#ifdef COMPILE_PCRE8
@ -1148,7 +1148,7 @@ do
SET_BIT(0x0B);
SET_BIT(0x0C);
SET_BIT(0x0D);
#ifdef COMPILE_PCRE8
#ifdef SUPPORT_UTF
if (utf)
{
#ifdef COMPILE_PCRE8
@ -1418,7 +1418,8 @@ we don't have to change that code. */
if (bits_set || min > 0
#ifdef SUPPORT_JIT
|| (options & PCRE_STUDY_JIT_COMPILE) != 0
|| (options & (PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
| PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)) != 0
#endif
)
{
@ -1478,7 +1479,13 @@ if (bits_set || min > 0
#ifdef SUPPORT_JIT
extra->executable_jit = NULL;
if ((options & PCRE_STUDY_JIT_COMPILE) != 0) PRIV(jit_compile)(re, extra);
if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
PRIV(jit_compile)(re, extra, JIT_COMPILE);
if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)
{
#ifdef COMPILE_PCRE8

View File

@ -587,7 +587,7 @@ const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
unsigned int
_pcre_ucp_othercase(const unsigned int c)
{
unsigned int oc;
unsigned int oc = NOTACHAR;
if ((oc = g_unichar_toupper(c)) != c)
return oc;

View File

@ -10,7 +10,11 @@ the UCD access macros. New values that are added for new releases of Unicode
should always be at the end of each enum, for backwards compatibility. */
/* These are the general character categories. */
#ifdef GLIB_COMPILATION
#include "gunicode.h"
#else
#include <glib.h>
#endif
enum {
ucp_C, /* Other */
@ -60,6 +64,9 @@ enum {
/* These are the script identifications. */
enum {
ucp_Common = G_UNICODE_SCRIPT_COMMON,
ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
@ -69,7 +76,6 @@ enum {
ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
ucp_Common = G_UNICODE_SCRIPT_COMMON,
ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
@ -87,7 +93,6 @@ enum {
ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,

View File

@ -1,11 +1,10 @@
From 384879be07418fc6224b6603a2e8ca6f11e178fc Mon Sep 17 00:00:00 2001
From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001
From: Christian Persch <chpe@gnome.org>
Date: Sun, 12 Feb 2012 21:20:33 +0100
Subject: [PATCH] regex: Use glib for unicode data
Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
---
glib/pcre/Makefile.am | 1 -
glib/pcre/pcre_compile.c | 26 +++---
glib/pcre/pcre_dfa_exec.c | 96 ++++++++--------
glib/pcre/pcre_exec.c | 26 +++---
@ -13,25 +12,13 @@ Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
glib/pcre/pcre_tables.c | 16 +++
glib/pcre/pcre_xclass.c | 24 ++--
glib/pcre/ucp.h | 265 +++++++++++++++++++++++----------------------
8 files changed, 239 insertions(+), 226 deletions(-)
7 files changed, 239 insertions(+), 225 deletions(-)
diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am
index 21da5c5..1981953 100644
--- a/glib/pcre/Makefile.am
+++ b/glib/pcre/Makefile.am
@@ -51,7 +51,6 @@ libpcre_la_SOURCES = \
pcre_string_utils.c \
pcre_study.c \
pcre_tables.c \
- pcre_ucd.c \
pcre_valid_utf8.c \
pcre_version.c \
pcre_xclass.c \
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
index eb985df..b44055a 100644
index 21bef80..a6c84e1 100644
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
@@ -2890,43 +2890,43 @@ Returns: TRUE if auto-possessifying is OK
@@ -2920,43 +2920,43 @@ Returns: TRUE if auto-possessifying is OK
static BOOL
check_char_prop(int c, int ptype, int pdata, BOOL negated)
{
@ -89,10 +76,10 @@ index eb985df..b44055a 100644
}
return FALSE;
diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
index 21d7be6..41ff65b 100644
index 9565d46..3f913ce 100644
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@@ -1015,7 +1015,7 @@ for (;;)
@@ -1060,7 +1060,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
@ -101,7 +88,7 @@ index 21d7be6..41ff65b 100644
switch(code[1])
{
case PT_ANY:
@@ -1023,43 +1023,43 @@ for (;;)
@@ -1068,43 +1068,43 @@ for (;;)
break;
case PT_LAMP:
@ -156,7 +143,7 @@ index 21d7be6..41ff65b 100644
c == CHAR_UNDERSCORE;
break;
@@ -1209,7 +1209,7 @@ for (;;)
@@ -1294,7 +1294,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
@ -165,7 +152,7 @@ index 21d7be6..41ff65b 100644
switch(code[2])
{
case PT_ANY:
@@ -1217,43 +1217,43 @@ for (;;)
@@ -1302,43 +1302,43 @@ for (;;)
break;
case PT_LAMP:
@ -220,7 +207,7 @@ index 21d7be6..41ff65b 100644
c == CHAR_UNDERSCORE;
break;
@@ -1456,7 +1456,7 @@ for (;;)
@@ -1541,7 +1541,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
@ -229,7 +216,7 @@ index 21d7be6..41ff65b 100644
switch(code[2])
{
case PT_ANY:
@@ -1464,43 +1464,43 @@ for (;;)
@@ -1549,43 +1549,43 @@ for (;;)
break;
case PT_LAMP:
@ -284,7 +271,7 @@ index 21d7be6..41ff65b 100644
c == CHAR_UNDERSCORE;
break;
@@ -1728,7 +1728,7 @@ for (;;)
@@ -1813,7 +1813,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
@ -293,7 +280,7 @@ index 21d7be6..41ff65b 100644
switch(code[1 + IMM2_SIZE + 1])
{
case PT_ANY:
@@ -1736,43 +1736,43 @@ for (;;)
@@ -1821,43 +1821,43 @@ for (;;)
break;
case PT_LAMP:
@ -349,10 +336,10 @@ index 21d7be6..41ff65b 100644
break;
diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
index b715353..8eb3162 100644
index 830b8b5..c89a3f9 100644
--- a/glib/pcre/pcre_exec.c
+++ b/glib/pcre/pcre_exec.c
@@ -2507,7 +2507,7 @@ for (;;)
@@ -2565,7 +2565,7 @@ for (;;)
}
GETCHARINCTEST(c, eptr);
{
@ -361,7 +348,7 @@ index b715353..8eb3162 100644
switch(ecode[1])
{
@@ -2516,44 +2516,44 @@ for (;;)
@@ -2574,44 +2574,44 @@ for (;;)
break;
case PT_LAMP:
@ -416,7 +403,7 @@ index b715353..8eb3162 100644
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR)
== (op == OP_NOTPROP))
@@ -2561,8 +2561,8 @@ for (;;)
@@ -2619,8 +2619,8 @@ for (;;)
break;
case PT_WORD:
@ -428,10 +415,10 @@ index b715353..8eb3162 100644
RRETURN(MATCH_NOMATCH);
break;
diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
index e5a4b6a..41c7ee3 100644
index 181c312..234af1b 100644
--- a/glib/pcre/pcre_internal.h
+++ b/glib/pcre/pcre_internal.h
@@ -2315,15 +2315,12 @@ extern const int PRIV(ucp_typerange)[];
@@ -2329,15 +2329,12 @@ extern const int PRIV(ucp_typerange)[];
#ifdef SUPPORT_UCP
/* UCD access macros */
@ -452,10 +439,10 @@ index e5a4b6a..41c7ee3 100644
#endif /* SUPPORT_UCP */
diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
index c8134ec..47becc7 100644
index 7ac2d89..e401974 100644
--- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c
@@ -563,6 +563,22 @@ const ucp_type_table PRIV(utt)[] = {
@@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = {
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);