regex: Import PCRE 8.31

https://bugzilla.gnome.org/show_bug.cgi?id=679193
This commit is contained in:
Christian Persch 2012-06-14 22:15:27 +02:00
parent f66052fc87
commit 9457833010
12 changed files with 2090 additions and 1127 deletions

View File

@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */ /* The current PCRE version information. */
#define PCRE_MAJOR 8 #define PCRE_MAJOR 8
#define PCRE_MINOR 30 #define PCRE_MINOR 31
#define PCRE_PRERELEASE #define PCRE_PRERELEASE
#define PCRE_DATE 2012-02-04 #define PCRE_DATE 2012-07-06
/* When an application links to a PCRE DLL in Windows, the symbols that are /* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate imported have to be identified as such. When building PCRE, the appropriate
@ -179,6 +179,7 @@ compiling). */
#define PCRE_ERROR_JIT_STACKLIMIT (-27) #define PCRE_ERROR_JIT_STACKLIMIT (-27)
#define PCRE_ERROR_BADMODE (-28) #define PCRE_ERROR_BADMODE (-28)
#define PCRE_ERROR_BADENDIANNESS (-29) #define PCRE_ERROR_BADENDIANNESS (-29)
#define PCRE_ERROR_DFA_BADRESTART (-30)
/* Specific error codes for UTF-8 validity checks */ /* Specific error codes for UTF-8 validity checks */
@ -234,6 +235,7 @@ compiling). */
#define PCRE_INFO_MINLENGTH 15 #define PCRE_INFO_MINLENGTH 15
#define PCRE_INFO_JIT 16 #define PCRE_INFO_JIT 16
#define PCRE_INFO_JITSIZE 17 #define PCRE_INFO_JITSIZE 17
#define PCRE_INFO_MAXLOOKBEHIND 18
/* Request types for pcre_config(). Do not re-arrange, in order to remain /* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */ compatible. */
@ -255,6 +257,8 @@ compatible. */
compatible. */ compatible. */
#define PCRE_STUDY_JIT_COMPILE 0x0001 #define PCRE_STUDY_JIT_COMPILE 0x0001
#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004
/* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine /* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */ these bits, just add new ones on the end, in order to remain compatible. */

View File

@ -52,7 +52,11 @@ supporting internal functions that are not used by other modules. */
#include "pcre_internal.h" #include "pcre_internal.h"
#ifdef GLIB_COMPILATION
#include "gstrfuncs.h" #include "gstrfuncs.h"
#else
#include <glib.h>
#endif
/* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which /* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
is also used by pcretest. PCRE_DEBUG is not defined when building a production is also used by pcretest. PCRE_DEBUG is not defined when building a production
@ -490,6 +494,9 @@ static const char error_texts[] =
"too many forward references\0" "too many forward references\0"
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
"invalid UTF-16 string\0" "invalid UTF-16 string\0"
/* 75 */
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character value in \\u.... sequence is too large\0"
; ;
/* Table to identify digits and hex digits. This is used when compiling /* Table to identify digits and hex digits. This is used when compiling
@ -831,6 +838,18 @@ else
c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
#endif #endif
} }
#ifdef COMPILE_PCRE8
if (c > (utf ? 0x10ffff : 0xff))
#else
#ifdef COMPILE_PCRE16
if (c > (utf ? 0x10ffff : 0xffff))
#endif
#endif
{
*errorcodeptr = ERR76;
}
else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
} }
} }
else else
@ -2227,32 +2246,60 @@ for (;;)
{ {
case OP_CHAR: case OP_CHAR:
case OP_CHARI: case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_EXACT: case OP_EXACT:
case OP_EXACTI: case OP_EXACTI:
case OP_NOTEXACT:
case OP_NOTEXACTI:
case OP_UPTO: case OP_UPTO:
case OP_UPTOI: case OP_UPTOI:
case OP_NOTUPTO:
case OP_NOTUPTOI:
case OP_MINUPTO: case OP_MINUPTO:
case OP_MINUPTOI: case OP_MINUPTOI:
case OP_NOTMINUPTO:
case OP_NOTMINUPTOI:
case OP_POSUPTO: case OP_POSUPTO:
case OP_POSUPTOI: case OP_POSUPTOI:
case OP_NOTPOSUPTO:
case OP_NOTPOSUPTOI:
case OP_STAR: case OP_STAR:
case OP_STARI: case OP_STARI:
case OP_NOTSTAR:
case OP_NOTSTARI:
case OP_MINSTAR: case OP_MINSTAR:
case OP_MINSTARI: case OP_MINSTARI:
case OP_NOTMINSTAR:
case OP_NOTMINSTARI:
case OP_POSSTAR: case OP_POSSTAR:
case OP_POSSTARI: case OP_POSSTARI:
case OP_NOTPOSSTAR:
case OP_NOTPOSSTARI:
case OP_PLUS: case OP_PLUS:
case OP_PLUSI: case OP_PLUSI:
case OP_NOTPLUS:
case OP_NOTPLUSI:
case OP_MINPLUS: case OP_MINPLUS:
case OP_MINPLUSI: case OP_MINPLUSI:
case OP_NOTMINPLUS:
case OP_NOTMINPLUSI:
case OP_POSPLUS: case OP_POSPLUS:
case OP_POSPLUSI: case OP_POSPLUSI:
case OP_NOTPOSPLUS:
case OP_NOTPOSPLUSI:
case OP_QUERY: case OP_QUERY:
case OP_QUERYI: case OP_QUERYI:
case OP_NOTQUERY:
case OP_NOTQUERYI:
case OP_MINQUERY: case OP_MINQUERY:
case OP_MINQUERYI: case OP_MINQUERYI:
case OP_NOTMINQUERY:
case OP_NOTMINQUERYI:
case OP_POSQUERY: case OP_POSQUERY:
case OP_POSQUERYI: case OP_POSQUERYI:
case OP_NOTPOSQUERY:
case OP_NOTPOSQUERYI:
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
break; break;
} }
@ -3071,22 +3118,28 @@ if (next >= 0) switch(op_code)
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UTF */
return (c != TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */ return (c != TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */
/* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
opcodes are not used for multi-byte characters, because they are coded using
an XCLASS instead. */
case OP_NOT: case OP_NOT:
return (c = *previous) == next; #ifdef SUPPORT_UTF
GETCHARTEST(c, previous);
#else
c = *previous;
#endif
return c == next;
case OP_NOTI: case OP_NOTI:
if ((c = *previous) == next) return TRUE; #ifdef SUPPORT_UTF
GETCHARTEST(c, previous);
#else
c = *previous;
#endif
if (c == next) return TRUE;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
if (utf) if (utf)
{ {
unsigned int othercase; unsigned int othercase;
if (next < 128) othercase = cd->fcc[next]; else if (next < 128) othercase = cd->fcc[next]; else
#ifdef SUPPORT_UCP #ifdef SUPPORT_UCP
othercase = UCD_OTHERCASE(next); othercase = UCD_OTHERCASE((unsigned int)next);
#else #else
othercase = NOTACHAR; othercase = NOTACHAR;
#endif #endif
@ -3094,28 +3147,28 @@ if (next >= 0) switch(op_code)
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UTF */
return (c == (int)(TABLE_GET((unsigned int)next, cd->fcc, next))); /* Non-UTF-8 mode */ return (c == TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */
/* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set. /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
case OP_DIGIT: case OP_DIGIT:
return next > 127 || (cd->ctypes[next] & ctype_digit) == 0; return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;
case OP_NOT_DIGIT: case OP_NOT_DIGIT:
return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0; return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;
case OP_WHITESPACE: case OP_WHITESPACE:
return next > 127 || (cd->ctypes[next] & ctype_space) == 0; return next > 255 || (cd->ctypes[next] & ctype_space) == 0;
case OP_NOT_WHITESPACE: case OP_NOT_WHITESPACE:
return next <= 127 && (cd->ctypes[next] & ctype_space) != 0; return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;
case OP_WORDCHAR: case OP_WORDCHAR:
return next > 127 || (cd->ctypes[next] & ctype_word) == 0; return next > 255 || (cd->ctypes[next] & ctype_word) == 0;
case OP_NOT_WORDCHAR: case OP_NOT_WORDCHAR:
return next <= 127 && (cd->ctypes[next] & ctype_word) != 0; return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;
case OP_HSPACE: case OP_HSPACE:
case OP_NOT_HSPACE: case OP_NOT_HSPACE:
@ -3193,22 +3246,22 @@ switch(op_code)
switch(-next) switch(-next)
{ {
case ESC_d: case ESC_d:
return c > 127 || (cd->ctypes[c] & ctype_digit) == 0; return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;
case ESC_D: case ESC_D:
return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0; return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0;
case ESC_s: case ESC_s:
return c > 127 || (cd->ctypes[c] & ctype_space) == 0; return c > 255 || (cd->ctypes[c] & ctype_space) == 0;
case ESC_S: case ESC_S:
return c <= 127 && (cd->ctypes[c] & ctype_space) != 0; return c <= 255 && (cd->ctypes[c] & ctype_space) != 0;
case ESC_w: case ESC_w:
return c > 127 || (cd->ctypes[c] & ctype_word) == 0; return c > 255 || (cd->ctypes[c] & ctype_word) == 0;
case ESC_W: case ESC_W:
return c <= 127 && (cd->ctypes[c] & ctype_word) != 0; return c <= 255 && (cd->ctypes[c] & ctype_word) != 0;
case ESC_h: case ESC_h:
case ESC_H: case ESC_H:
@ -3317,10 +3370,10 @@ switch(op_code)
return next == -ESC_d; return next == -ESC_d;
case OP_WHITESPACE: case OP_WHITESPACE:
return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R; return next == -ESC_S || next == -ESC_d || next == -ESC_w;
case OP_NOT_WHITESPACE: case OP_NOT_WHITESPACE:
return next == -ESC_s || next == -ESC_h || next == -ESC_v; return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R;
case OP_HSPACE: case OP_HSPACE:
return next == -ESC_S || next == -ESC_H || next == -ESC_d || return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
@ -4484,41 +4537,34 @@ for (;; ptr++)
LONE_SINGLE_CHARACTER: LONE_SINGLE_CHARACTER:
/* Only the value of 1 matters for class_single_char. */ /* Only the value of 1 matters for class_single_char. */
if (class_single_char < 2) class_single_char++; if (class_single_char < 2) class_single_char++;
/* If class_charcount is 1, we saw precisely one character. As long as /* If class_charcount is 1, we saw precisely one character. As long as
there were no negated characters >= 128 and there was no use of \p or \P, there was no use of \p or \P, in other words, no use of any XCLASS
in other words, no use of any XCLASS features, we can optimize. features, we can optimize.
In UTF-8 mode, we can optimize the negative case only if there were no
characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
operate on single-bytes characters only. This is an historical hangover.
Maybe one day we can tidy these opcodes to handle multi-byte characters.
The optimization throws away the bit map. We turn the item into a The optimization throws away the bit map. We turn the item into a
1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative. 1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
Note that OP_NOT[I] does not support multibyte characters. In the positive In the positive case, it can cause firstchar to be set. Otherwise, there
case, it can cause firstchar to be set. Otherwise, there can be no first can be no first char if this item is first, whatever repeat count may
char if this item is first, whatever repeat count may follow. In the case follow. In the case of reqchar, save the previous value for reinstating. */
of reqchar, save the previous value for reinstating. */
#ifdef SUPPORT_UTF
if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET
&& (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))
#else
if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
#endif
{ {
ptr++; ptr++;
zeroreqchar = reqchar; zeroreqchar = reqchar;
/* The OP_NOT[I] opcodes work on single characters only. */
if (negate_class) if (negate_class)
{ {
if (firstchar == REQ_UNSET) firstchar = REQ_NONE; if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
zerofirstchar = firstchar; zerofirstchar = firstchar;
*code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
#ifdef SUPPORT_UTF
if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
code += PRIV(ord2utf)(c, code);
else
#endif
*code++ = c; *code++ = c;
goto NOT_CHAR; goto NOT_CHAR;
} }
@ -4777,15 +4823,23 @@ for (;; ptr++)
/* Now handle repetition for the different types of item. */ /* Now handle repetition for the different types of item. */
/* If previous was a character match, abolish the item and generate a /* If previous was a character or negated character match, abolish the item
repeat item instead. If a char item has a minumum of more than one, ensure and generate a repeat item instead. If a char item has a minimum of more
that it is set in reqchar - it might not be if a sequence such as x{3} is than one, ensure that it is set in reqchar - it might not be if a sequence
the first thing in a branch because the x will have gone into firstchar such as x{3} is the first thing in a branch because the x will have gone
instead. */ into firstchar instead. */
if (*previous == OP_CHAR || *previous == OP_CHARI) if (*previous == OP_CHAR || *previous == OP_CHARI
|| *previous == OP_NOT || *previous == OP_NOTI)
{ {
op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR; switch (*previous)
{
default: /* Make compiler happy. */
case OP_CHAR: op_type = OP_STAR - OP_STAR; break;
case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break;
case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break;
}
/* Deal with UTF characters that take up more than one character. It's /* Deal with UTF characters that take up more than one character. It's
easier to write this out separately than try to macrify it. Use c to easier to write this out separately than try to macrify it. Use c to
@ -4808,7 +4862,8 @@ for (;; ptr++)
with UTF disabled, or for a single character UTF character. */ with UTF disabled, or for a single character UTF character. */
{ {
c = code[-1]; c = code[-1];
if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt; if (*previous <= OP_CHARI && repeat_min > 1)
reqchar = c | req_caseopt | cd->req_varyopt;
} }
/* If the repetition is unlimited, it pays to see if the next thing on /* If the repetition is unlimited, it pays to see if the next thing on
@ -4827,26 +4882,6 @@ for (;; ptr++)
goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
} }
/* If previous was a single negated character ([^a] or similar), we use
one of the special opcodes, replacing it. The code is shared with single-
character repeats by setting opt_type to add a suitable offset into
repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI
are currently used only for single-byte chars. */
else if (*previous == OP_NOT || *previous == OP_NOTI)
{
op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR;
c = previous[1];
if (!possessive_quantifier &&
repeat_max < 0 &&
check_auto_possessive(previous, utf, ptr + 1, options, cd))
{
repeat_type = 0; /* Force greedy */
possessive_quantifier = TRUE;
}
goto OUTPUT_SINGLE_REPEAT;
}
/* If previous was a character type match (\d or similar), abolish it and /* If previous was a character type match (\d or similar), abolish it and
create a suitable repeat item. The code is shared with single-character create a suitable repeat item. The code is shared with single-character
repeats by setting op_type to add a suitable offset into repeat_type. Note repeats by setting op_type to add a suitable offset into repeat_type. Note
@ -5587,6 +5622,11 @@ for (;; ptr++)
arg = ++ptr; arg = ++ptr;
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
arglen = (int)(ptr - arg); arglen = (int)(ptr - arg);
if (arglen > (int)MAX_MARK)
{
*errorcodeptr = ERR75;
goto FAILED;
}
} }
if (*ptr != CHAR_RIGHT_PARENTHESIS) if (*ptr != CHAR_RIGHT_PARENTHESIS)
@ -6838,10 +6878,13 @@ for (;; ptr++)
/* For the rest (including \X when Unicode properties are supported), we /* For the rest (including \X when Unicode properties are supported), we
can obtain the OP value by negating the escape value in the default can obtain the OP value by negating the escape value in the default
situation when PCRE_UCP is not set. When it *is* set, we substitute situation when PCRE_UCP is not set. When it *is* set, we substitute
Unicode property tests. */ Unicode property tests. Note that \b and \B do a one-character
lookbehind. */
else else
{ {
if ((-c == ESC_b || -c == ESC_B) && cd->max_lookbehind == 0)
cd->max_lookbehind = 1;
#ifdef SUPPORT_UCP #ifdef SUPPORT_UCP
if (-c >= ESC_DU && -c <= ESC_wu) if (-c >= ESC_DU && -c <= ESC_wu)
{ {
@ -7149,7 +7192,12 @@ for (;;)
*ptrptr = ptr; *ptrptr = ptr;
return FALSE; return FALSE;
} }
else { PUT(reverse_count, 0, fixed_length); } else
{
if (fixed_length > cd->max_lookbehind)
cd->max_lookbehind = fixed_length;
PUT(reverse_count, 0, fixed_length);
}
} }
} }
@ -7819,6 +7867,7 @@ cd->start_pattern = (const pcre_uchar *)pattern;
cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern)); cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
cd->req_varyopt = 0; cd->req_varyopt = 0;
cd->assert_depth = 0; cd->assert_depth = 0;
cd->max_lookbehind = 0;
cd->external_options = options; cd->external_options = options;
cd->external_flags = 0; cd->external_flags = 0;
cd->open_caps = NULL; cd->open_caps = NULL;
@ -7869,7 +7918,6 @@ re->magic_number = MAGIC_NUMBER;
re->size = (int)size; re->size = (int)size;
re->options = cd->external_options; re->options = cd->external_options;
re->flags = cd->external_flags; re->flags = cd->external_flags;
re->dummy1 = 0;
re->first_char = 0; re->first_char = 0;
re->req_char = 0; re->req_char = 0;
re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar); re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
@ -7889,6 +7937,7 @@ field; this time it's used for remembering forward references to subpatterns.
cd->final_bracount = cd->bracount; /* Save for checking forward references */ cd->final_bracount = cd->bracount; /* Save for checking forward references */
cd->assert_depth = 0; cd->assert_depth = 0;
cd->bracount = 0; cd->bracount = 0;
cd->max_lookbehind = 0;
cd->names_found = 0; cd->names_found = 0;
cd->name_table = (pcre_uchar *)re + re->name_table_offset; cd->name_table = (pcre_uchar *)re + re->name_table_offset;
codestart = cd->name_table + re->name_entry_size * re->name_count; codestart = cd->name_table + re->name_entry_size * re->name_count;
@ -7910,6 +7959,7 @@ code = (pcre_uchar *)codestart;
&firstchar, &reqchar, NULL, cd, NULL); &firstchar, &reqchar, NULL, cd, NULL);
re->top_bracket = cd->bracount; re->top_bracket = cd->bracount;
re->top_backref = cd->top_backref; re->top_backref = cd->top_backref;
re->max_lookbehind = cd->max_lookbehind;
re->flags = cd->external_flags | PCRE_MODE; re->flags = cd->external_flags | PCRE_MODE;
if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */ if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */
@ -7997,6 +8047,7 @@ if (cd->check_lookbehind)
(fixed_length == -4)? ERR70 : ERR25; (fixed_length == -4)? ERR70 : ERR25;
break; break;
} }
if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length;
PUT(cc, 1, fixed_length); PUT(cc, 1, fixed_length);
} }
cc += 1 + LINK_SIZE; cc += 1 + LINK_SIZE;

View File

@ -38,10 +38,9 @@ POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
*/ */
/* This module contains the external function pcre_dfa_exec(), which is an /* This module contains the external function pcre_dfa_exec(), which is an
alternative matching function that uses a sort of DFA algorithm (not a true alternative matching function that uses a sort of DFA algorithm (not a true
FSM). This is NOT Perl- compatible, but it has advantages in certain FSM). This is NOT Perl-compatible, but it has advantages in certain
applications. */ applications. */
@ -282,7 +281,7 @@ typedef struct stateblock {
int data; /* Some use extra data */ int data; /* Some use extra data */
} stateblock; } stateblock;
#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int)) #define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
#ifdef PCRE_DEBUG #ifdef PCRE_DEBUG
@ -382,7 +381,8 @@ for the current character, one for the following character). */
next_new_state->count = (y); \ next_new_state->count = (y); \
next_new_state->data = (z); \ next_new_state->data = (z); \
next_new_state++; \ next_new_state++; \
DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
(x), (y), (z), __LINE__)); \
} \ } \
else return PCRE_ERROR_DFA_WSSIZE else return PCRE_ERROR_DFA_WSSIZE
@ -424,6 +424,8 @@ BOOL utf = (md->poptions & PCRE_UTF8) != 0;
BOOL utf = FALSE; BOOL utf = FALSE;
#endif #endif
BOOL reset_could_continue = FALSE;
rlevel++; rlevel++;
offsetcount &= (-2); offsetcount &= (-2);
@ -571,7 +573,9 @@ for (;;)
int clen, dlen; int clen, dlen;
unsigned int c, d; unsigned int c, d;
int forced_fail = 0; int forced_fail = 0;
BOOL could_continue = FALSE; BOOL partial_newline = FALSE;
BOOL could_continue = reset_could_continue;
reset_could_continue = FALSE;
/* Make the new state list into the active state list and empty the /* Make the new state list into the active state list and empty the
new state list. */ new state list. */
@ -607,7 +611,7 @@ for (;;)
if (ptr < end_subject) if (ptr < end_subject)
{ {
clen = 1; /* Number of bytes in the character */ clen = 1; /* Number of data items in the character */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
if (utf) { GETCHARLEN(c, ptr, clen); } else if (utf) { GETCHARLEN(c, ptr, clen); } else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UTF */
@ -641,7 +645,8 @@ for (;;)
/* A negative offset is a special case meaning "hold off going to this /* A negative offset is a special case meaning "hold off going to this
(negated) state until the number of characters in the data field have (negated) state until the number of characters in the data field have
been skipped". */ been skipped". If the could_continue flag was passed over from a previous
state, arrange for it to passed on. */
if (state_offset < 0) if (state_offset < 0)
{ {
@ -650,6 +655,7 @@ for (;;)
DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP)); DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
ADD_NEW_DATA(state_offset, current_state->count, ADD_NEW_DATA(state_offset, current_state->count,
current_state->data - 1); current_state->data - 1);
if (could_continue) reset_could_continue = TRUE;
continue; continue;
} }
else else
@ -689,10 +695,10 @@ for (;;)
permitted. permitted.
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
argument that is not a data character - but is always one byte long. We argument that is not a data character - but is always one byte long because
have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in the values are small. We have to take special action to deal with \P, \p,
this case. To keep the other cases fast, convert these ones to new opcodes. \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
*/ these ones to new opcodes. */
if (coptable[codevalue] > 0) if (coptable[codevalue] > 0)
{ {
@ -783,7 +789,7 @@ for (;;)
offsets[0] = (int)(current_subject - start_subject); offsets[0] = (int)(current_subject - start_subject);
offsets[1] = (int)(ptr - start_subject); offsets[1] = (int)(ptr - start_subject);
DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP, DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
offsets[1] - offsets[0], current_subject)); offsets[1] - offsets[0], (char *)current_subject));
} }
if ((md->moptions & PCRE_DFA_SHORTEST) != 0) if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
{ {
@ -888,7 +894,20 @@ for (;;)
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
case OP_ANY: case OP_ANY:
if (clen > 0 && !IS_NEWLINE(ptr)) if (clen > 0 && !IS_NEWLINE(ptr))
{ ADD_NEW(state_offset + 1, 0); } {
if (ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else
{
ADD_NEW(state_offset + 1, 0);
}
}
break; break;
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
@ -916,6 +935,19 @@ for (;;)
(ptr == end_subject - md->nllen) (ptr == end_subject - md->nllen)
)) ))
{ ADD_ACTIVE(state_offset + 1, 0); } { ADD_ACTIVE(state_offset + 1, 0); }
else if (ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
{
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
else could_continue = partial_newline = TRUE;
}
} }
break; break;
@ -928,6 +960,19 @@ for (;;)
else if (clen == 0 || else if (clen == 0 ||
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr))) ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
{ ADD_ACTIVE(state_offset + 1, 0); } { ADD_ACTIVE(state_offset + 1, 0); }
else if (ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
{
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
else could_continue = partial_newline = TRUE;
}
} }
else if (IS_NEWLINE(ptr)) else if (IS_NEWLINE(ptr))
{ ADD_ACTIVE(state_offset + 1, 0); } { ADD_ACTIVE(state_offset + 1, 0); }
@ -1090,7 +1135,15 @@ for (;;)
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0) if (clen > 0)
{ {
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 && (c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) && (d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1113,7 +1166,15 @@ for (;;)
ADD_ACTIVE(state_offset + 2, 0); ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0) if (clen > 0)
{ {
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 && (c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) && (d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1135,7 +1196,15 @@ for (;;)
ADD_ACTIVE(state_offset + 2, 0); ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0) if (clen > 0)
{ {
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 && (c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) && (d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1155,7 +1224,15 @@ for (;;)
count = current_state->count; /* Number already matched */ count = current_state->count; /* Number already matched */
if (clen > 0) if (clen > 0)
{ {
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 && (c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) && (d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1176,7 +1253,15 @@ for (;;)
count = current_state->count; /* Number already matched */ count = current_state->count; /* Number already matched */
if (clen > 0) if (clen > 0)
{ {
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 && (c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) && (d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1824,6 +1909,8 @@ for (;;)
ncount++; ncount++;
nptr += ndlen; nptr += ndlen;
} }
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
if (++count >= GET2(code, 1)) if (++count >= GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else else
@ -2037,6 +2124,8 @@ for (;;)
ncount++; ncount++;
nptr += nclen; nptr += nclen;
} }
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, ncount); ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
} }
break; break;
@ -2062,7 +2151,13 @@ for (;;)
break; break;
case 0x000d: case 0x000d:
if (ptr + 1 < end_subject && ptr[1] == 0x0a) if (ptr + 1 >= end_subject)
{
ADD_NEW(state_offset + 1, 0);
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
}
else if (ptr[1] == 0x0a)
{ {
ADD_NEW_DATA(-(state_offset + 1), 0, 1); ADD_NEW_DATA(-(state_offset + 1), 0, 1);
} }
@ -2171,22 +2266,32 @@ for (;;)
break; break;
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
/* Match a negated single character casefully. This is only used for /* Match a negated single character casefully. */
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */
case OP_NOT: case OP_NOT:
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); } if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
break; break;
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
/* Match a negated single character caselessly. This is only used for /* Match a negated single character caselessly. */
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */
case OP_NOTI: case OP_NOTI:
if (clen > 0 && c != d && c != fcc[d]) if (clen > 0)
{
unsigned int otherd;
#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
#endif /* SUPPORT_UTF */
otherd = TABLE_GET(d, fcc, d);
if (c != d && c != otherd)
{ ADD_NEW(state_offset + dlen + 1, 0); } { ADD_NEW(state_offset + dlen + 1, 0); }
}
break; break;
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
@ -2692,9 +2797,12 @@ for (;;)
{ {
int charcount = local_offsets[rc+1] - local_offsets[rc]; int charcount = local_offsets[rc+1] - local_offsets[rc];
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
if (utf)
{
const pcre_uchar *p = start_subject + local_offsets[rc]; const pcre_uchar *p = start_subject + local_offsets[rc];
const pcre_uchar *pp = start_subject + local_offsets[rc+1]; const pcre_uchar *pp = start_subject + local_offsets[rc+1];
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
}
#endif #endif
if (charcount > 0) if (charcount > 0)
{ {
@ -2793,7 +2901,7 @@ for (;;)
const pcre_uchar *pp = local_ptr; const pcre_uchar *pp = local_ptr;
charcount = (int)(pp - p); charcount = (int)(pp - p);
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
#endif #endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
} }
@ -2875,9 +2983,12 @@ for (;;)
else else
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
if (utf)
{
const pcre_uchar *p = start_subject + local_offsets[0]; const pcre_uchar *p = start_subject + local_offsets[0];
const pcre_uchar *pp = start_subject + local_offsets[1]; const pcre_uchar *pp = start_subject + local_offsets[1];
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
}
#endif #endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
if (repeat_state_offset >= 0) if (repeat_state_offset >= 0)
@ -2946,7 +3057,7 @@ for (;;)
if (new_count <= 0) if (new_count <= 0)
{ {
if (rlevel == 1 && /* Top level, and */ if (rlevel == 1 && /* Top level, and */
could_continue && /* Some could go on */ could_continue && /* Some could go on, and */
forced_fail != workspace[1] && /* Not all forced fail & */ forced_fail != workspace[1] && /* Not all forced fail & */
( /* either... */ ( /* either... */
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */ (md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
@ -2954,8 +3065,13 @@ for (;;)
((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */ ((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
match_count < 0) /* no matches */ match_count < 0) /* no matches */
) && /* And... */ ) && /* And... */
ptr >= end_subject && /* Reached end of subject */ (
partial_newline || /* Either partial NL */
( /* or ... */
ptr >= end_subject && /* End of subject and */
ptr > md->start_used_ptr) /* Inspected non-empty string */ ptr > md->start_used_ptr) /* Inspected non-empty string */
)
)
{ {
if (offsetcount >= 2) if (offsetcount >= 2)
{ {
@ -3052,10 +3168,27 @@ if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE; if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
/* We need to find the pointer to any study data before we test for byte /* Check that the first field in the block is the magic number. If it is not,
flipping, so we scan the extra_data block first. This may set two fields in the return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
match block, so we must initialize them beforehand. However, the other fields REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
in the match block must not be set until after the byte flipping. */ means that the pattern is likely compiled with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
/* If restarting after a partial match, do some sanity checks on the contents
of the workspace. */
if ((options & PCRE_DFA_RESTART) != 0)
{
if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
return PCRE_ERROR_DFA_BADRESTART;
}
/* Set up study, callout, and table data */
md->tables = re->tables; md->tables = re->tables;
md->callout_data = NULL; md->callout_data = NULL;
@ -3074,16 +3207,6 @@ if (extra_data != NULL)
md->tables = extra_data->tables; md->tables = extra_data->tables;
} }
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
means that the pattern is likely compiled with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
/* Set some local values */ /* Set some local values */
current_subject = (const pcre_uchar *)subject + start_offset; current_subject = (const pcre_uchar *)subject + start_offset;

View File

@ -37,7 +37,6 @@ POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
*/ */
/* This module contains pcre_exec(), the externally visible function that does /* This module contains pcre_exec(), the externally visible function that does
pattern matching using an NFA algorithm, trying to mimic Perl as closely as pattern matching using an NFA algorithm, trying to mimic Perl as closely as
possible. There are also some static supporting functions. */ possible. There are also some static supporting functions. */
@ -140,7 +139,9 @@ Arguments:
md points to match data block md points to match data block
caseless TRUE if caseless caseless TRUE if caseless
Returns: < 0 if not matched, otherwise the number of subject bytes matched Returns: >= 0 the number of subject bytes matched
-1 no match
-2 partial match; always given if at end subject
*/ */
static int static int
@ -163,7 +164,8 @@ pchars(p, length, FALSE, md);
printf("\n"); printf("\n");
#endif #endif
/* Always fail if reference not set (and not JavaScript compatible). */ /* Always fail if reference not set (and not JavaScript compatible - in that
case the length is passed as zero). */
if (length < 0) return -1; if (length < 0) return -1;
@ -189,7 +191,7 @@ if (caseless)
while (p < endptr) while (p < endptr)
{ {
int c, d; int c, d;
if (eptr >= md->end_subject) return -1; if (eptr >= md->end_subject) return -2; /* Partial match */
GETCHARINC(c, eptr); GETCHARINC(c, eptr);
GETCHARINC(d, p); GETCHARINC(d, p);
if (c != d && c != UCD_OTHERCASE(d)) return -1; if (c != d && c != UCD_OTHERCASE(d)) return -1;
@ -202,9 +204,9 @@ if (caseless)
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
is no UCP support. */ is no UCP support. */
{ {
if (eptr + length > md->end_subject) return -1;
while (length-- > 0) while (length-- > 0)
{ {
if (eptr >= md->end_subject) return -2; /* Partial match */
if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1; if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
p++; p++;
eptr++; eptr++;
@ -217,8 +219,11 @@ are in UTF-8 mode. */
else else
{ {
if (eptr + length > md->end_subject) return -1; while (length-- > 0)
while (length-- > 0) if (*p++ != *eptr++) return -1; {
if (eptr >= md->end_subject) return -2; /* Partial match */
if (*p++ != *eptr++) return -1;
}
} }
return (int)(eptr - eptr_start); return (int)(eptr - eptr_start);
@ -311,9 +316,15 @@ argument of match(), which never changes. */
#define RMATCH(ra,rb,rc,rd,re,rw)\ #define RMATCH(ra,rb,rc,rd,re,rw)\
{\ {\
heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\ heapframe *newframe = frame->Xnextframe;\
if (newframe == NULL)\
{\
newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
frame->Xwhere = rw; \ newframe->Xnextframe = NULL;\
frame->Xnextframe = newframe;\
}\
frame->Xwhere = rw;\
newframe->Xeptr = ra;\ newframe->Xeptr = ra;\
newframe->Xecode = rb;\ newframe->Xecode = rb;\
newframe->Xmstart = mstart;\ newframe->Xmstart = mstart;\
@ -332,7 +343,6 @@ argument of match(), which never changes. */
{\ {\
heapframe *oldframe = frame;\ heapframe *oldframe = frame;\
frame = oldframe->Xprevframe;\ frame = oldframe->Xprevframe;\
if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
if (frame != NULL)\ if (frame != NULL)\
{\ {\
rrc = ra;\ rrc = ra;\
@ -346,6 +356,7 @@ argument of match(), which never changes. */
typedef struct heapframe { typedef struct heapframe {
struct heapframe *Xprevframe; struct heapframe *Xprevframe;
struct heapframe *Xnextframe;
/* Function arguments that may change */ /* Function arguments that may change */
@ -492,9 +503,7 @@ the top-level on the stack rather than malloc-ing them all gives a performance
boost in many cases where there is not much "recursion". */ boost in many cases where there is not much "recursion". */
#ifdef NO_RECURSE #ifdef NO_RECURSE
heapframe frame_zero; heapframe *frame = (heapframe *)md->match_frames_base;
heapframe *frame = &frame_zero;
frame->Xprevframe = NULL; /* Marks the top level */
/* Copy in the original argument variables */ /* Copy in the original argument variables */
@ -897,7 +906,6 @@ for (;;)
} }
else /* OP_KETRMAX */ else /* OP_KETRMAX */
{ {
md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, prev, offset_top, md, eptrb, RM66); RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
ecode += 1 + LINK_SIZE; ecode += 1 + LINK_SIZE;
@ -1026,7 +1034,8 @@ for (;;)
for (;;) for (;;)
{ {
if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP; if (op >= OP_SBRA || op == OP_ONCE)
md->match_function_type = MATCH_CBEGROUP;
/* If this is not a possibly empty group, and there are no (*THEN)s in /* If this is not a possibly empty group, and there are no (*THEN)s in
the pattern, and this is the final alternative, optimize as described the pattern, and this is the final alternative, optimize as described
@ -1565,13 +1574,18 @@ for (;;)
mstart = md->start_match_ptr; /* In case \K reset it */ mstart = md->start_match_ptr; /* In case \K reset it */
break; break;
} }
md->mark = save_mark;
/* PCRE does not allow THEN to escape beyond an assertion; it is treated /* A COMMIT failure must fail the entire assertion, without trying any
as NOMATCH. */ subsequent branches. */
if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
/* PCRE does not allow THEN to escape beyond an assertion; it
is treated as NOMATCH. */
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
ecode += GET(ecode, 1); ecode += GET(ecode, 1);
md->mark = save_mark;
} }
while (*ecode == OP_ALT); while (*ecode == OP_ALT);
@ -1779,10 +1793,11 @@ for (;;)
goto RECURSION_MATCHED; /* Exit loop; end processing */ goto RECURSION_MATCHED; /* Exit loop; end processing */
} }
/* PCRE does not allow THEN to escape beyond a recursion; it is treated /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
as NOMATCH. */ is treated as NOMATCH. */
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
rrc != MATCH_COMMIT)
{ {
DPRINTF(("Recursion gave error %d\n", rrc)); DPRINTF(("Recursion gave error %d\n", rrc));
if (new_recursive.offset_save != stacksave) if (new_recursive.offset_save != stacksave)
@ -1993,7 +2008,6 @@ for (;;)
} }
if (*prev >= OP_SBRA) /* Could match an empty string */ if (*prev >= OP_SBRA) /* Could match an empty string */
{ {
md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, prev, offset_top, md, eptrb, RM50); RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
RRETURN(rrc); RRETURN(rrc);
} }
@ -2002,7 +2016,6 @@ for (;;)
} }
else /* OP_KETRMAX */ else /* OP_KETRMAX */
{ {
if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, prev, offset_top, md, eptrb, RM13); RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH; if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@ -2059,7 +2072,21 @@ for (;;)
case OP_DOLLM: case OP_DOLLM:
if (eptr < md->end_subject) if (eptr < md->end_subject)
{ if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); } {
if (!IS_NEWLINE(eptr))
{
if (md->partial != 0 &&
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
RRETURN(MATCH_NOMATCH);
}
}
else else
{ {
if (md->noteol) RRETURN(MATCH_NOMATCH); if (md->noteol) RRETURN(MATCH_NOMATCH);
@ -2091,7 +2118,18 @@ for (;;)
ASSERT_NL_OR_EOS: ASSERT_NL_OR_EOS:
if (eptr < md->end_subject && if (eptr < md->end_subject &&
(!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
{
if (md->partial != 0 &&
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
}
/* Either at end of string or \n before end. */ /* Either at end of string or \n before end. */
@ -2219,12 +2257,25 @@ for (;;)
} }
break; break;
/* Match a single character type; inline for speed */ /* Match any single character type except newline; have to take care with
CRLF newlines and partial matching. */
case OP_ANY: case OP_ANY:
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
if (md->partial != 0 &&
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
/* Fall through */ /* Fall through */
/* Match any single character whatsoever. */
case OP_ALLANY: case OP_ALLANY:
if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
{ /* not be updated before SCHECK_PARTIAL. */ { /* not be updated before SCHECK_PARTIAL. */
@ -2365,7 +2416,11 @@ for (;;)
default: RRETURN(MATCH_NOMATCH); default: RRETURN(MATCH_NOMATCH);
case 0x000d: case 0x000d:
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
}
else if (*eptr == 0x0a) eptr++;
break; break;
case 0x000a: case 0x000a:
@ -2595,6 +2650,7 @@ for (;;)
if (UCD_CATEGORY(c) != ucp_M) break; if (UCD_CATEGORY(c) != ucp_M) break;
eptr += len; eptr += len;
} }
CHECK_PARTIAL();
ecode++; ecode++;
break; break;
#endif #endif
@ -2660,6 +2716,7 @@ for (;;)
default: /* No repeat follows */ default: /* No repeat follows */
if ((length = match_ref(offset, eptr, length, md, caseless)) < 0) if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
{ {
if (length == -2) eptr = md->end_subject; /* Partial match */
CHECK_PARTIAL(); CHECK_PARTIAL();
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
@ -2685,6 +2742,7 @@ for (;;)
int slength; int slength;
if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
{ {
if (slength == -2) eptr = md->end_subject; /* Partial match */
CHECK_PARTIAL(); CHECK_PARTIAL();
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
@ -2708,6 +2766,7 @@ for (;;)
if (fi >= max) RRETURN(MATCH_NOMATCH); if (fi >= max) RRETURN(MATCH_NOMATCH);
if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
{ {
if (slength == -2) eptr = md->end_subject; /* Partial match */
CHECK_PARTIAL(); CHECK_PARTIAL();
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
@ -2726,11 +2785,20 @@ for (;;)
int slength; int slength;
if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
{ {
CHECK_PARTIAL(); /* Can't use CHECK_PARTIAL because we don't want to update eptr in
the soft partial matching case. */
if (slength == -2 && md->partial != 0 &&
md->end_subject > md->start_used_ptr)
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
break; break;
} }
eptr += slength; eptr += slength;
} }
while (eptr >= pp) while (eptr >= pp)
{ {
RMATCH(eptr, ecode, offset_top, md, eptrb, RM15); RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
@ -3360,7 +3428,7 @@ for (;;)
maximizing, find the maximum number of characters and work backwards. */ maximizing, find the maximum number of characters and work backwards. */
DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
max, eptr)); max, (char *)eptr));
if (op >= OP_STARI) /* Caseless */ if (op >= OP_STARI) /* Caseless */
{ {
@ -3504,33 +3572,41 @@ for (;;)
SCHECK_PARTIAL(); SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
ecode++; #ifdef SUPPORT_UTF
GETCHARINCTEST(c, eptr); if (utf)
if (op == OP_NOTI) /* The caseless case */
{ {
register unsigned int ch, och; register unsigned int ch, och;
ch = *ecode++;
#ifdef COMPILE_PCRE8 ecode++;
/* ch must be < 128 if UTF is enabled. */ GETCHARINC(ch, ecode);
och = md->fcc[ch]; GETCHARINC(c, eptr);
#else
#ifdef SUPPORT_UTF if (op == OP_NOT)
{
if (ch == c) RRETURN(MATCH_NOMATCH);
}
else
{
#ifdef SUPPORT_UCP #ifdef SUPPORT_UCP
if (utf && ch > 127) if (ch > 127)
och = UCD_OTHERCASE(ch); och = UCD_OTHERCASE(ch);
#else #else
if (utf && ch > 127) if (ch > 127)
och = ch; och = ch;
#endif /* SUPPORT_UCP */ #endif /* SUPPORT_UCP */
else else
#endif /* SUPPORT_UTF */
och = TABLE_GET(ch, md->fcc, ch); och = TABLE_GET(ch, md->fcc, ch);
#endif /* COMPILE_PCRE8 */
if (ch == c || och == c) RRETURN(MATCH_NOMATCH); if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
} }
else /* Caseful */ }
else
#endif
{ {
if (*ecode++ == c) RRETURN(MATCH_NOMATCH); register unsigned int ch = ecode[1];
c = *eptr++;
if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
RRETURN(MATCH_NOMATCH);
ecode += 2;
} }
break; break;
@ -3610,7 +3686,7 @@ for (;;)
/* Common code for all repeated single-byte matches. */ /* Common code for all repeated single-byte matches. */
REPEATNOTCHAR: REPEATNOTCHAR:
fc = *ecode++; GETCHARINCTEST(fc, ecode);
/* The code is duplicated for the caseless and caseful cases, for speed, /* The code is duplicated for the caseless and caseful cases, for speed,
since matching characters is likely to be quite common. First, ensure the since matching characters is likely to be quite common. First, ensure the
@ -3621,14 +3697,10 @@ for (;;)
characters and work backwards. */ characters and work backwards. */
DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
max, eptr)); max, (char *)eptr));
if (op >= OP_NOTSTARI) /* Caseless */ if (op >= OP_NOTSTARI) /* Caseless */
{ {
#ifdef COMPILE_PCRE8
/* fc must be < 128 if UTF is enabled. */
foc = md->fcc[fc];
#else
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
#ifdef SUPPORT_UCP #ifdef SUPPORT_UCP
if (utf && fc > 127) if (utf && fc > 127)
@ -3640,7 +3712,6 @@ for (;;)
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UTF */
foc = TABLE_GET(fc, md->fcc, fc); foc = TABLE_GET(fc, md->fcc, fc);
#endif /* COMPILE_PCRE8 */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
if (utf) if (utf)
@ -3654,7 +3725,7 @@ for (;;)
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
GETCHARINC(d, eptr); GETCHARINC(d, eptr);
if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH); if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
} }
} }
else else
@ -4164,6 +4235,7 @@ for (;;)
if (UCD_CATEGORY(c) != ucp_M) break; if (UCD_CATEGORY(c) != ucp_M) break;
eptr += len; eptr += len;
} }
CHECK_PARTIAL();
} }
} }
@ -4184,6 +4256,15 @@ for (;;)
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
if (md->partial != 0 &&
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
eptr++; eptr++;
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
} }
@ -4468,6 +4549,15 @@ for (;;)
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
if (md->partial != 0 &&
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
eptr++; eptr++;
} }
break; break;
@ -4948,6 +5038,7 @@ for (;;)
if (UCD_CATEGORY(c) != ucp_M) break; if (UCD_CATEGORY(c) != ucp_M) break;
eptr += len; eptr += len;
} }
CHECK_PARTIAL();
} }
} }
else else
@ -4972,6 +5063,17 @@ for (;;)
switch(ctype) switch(ctype)
{ {
case OP_ANY: /* This is the non-NL case */ case OP_ANY: /* This is the non-NL case */
if (md->partial != 0 && /* Take care with CRLF partial */
eptr >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
break;
case OP_ALLANY: case OP_ALLANY:
case OP_ANYBYTE: case OP_ANYBYTE:
break; break;
@ -5135,6 +5237,17 @@ for (;;)
switch(ctype) switch(ctype)
{ {
case OP_ANY: /* This is the non-NL case */ case OP_ANY: /* This is the non-NL case */
if (md->partial != 0 && /* Take care with CRLF partial */
eptr >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
break;
case OP_ALLANY: case OP_ALLANY:
case OP_ANYBYTE: case OP_ANYBYTE:
break; break;
@ -5491,6 +5604,7 @@ for (;;)
if (UCD_CATEGORY(c) != ucp_M) break; if (UCD_CATEGORY(c) != ucp_M) break;
eptr += len; eptr += len;
} }
CHECK_PARTIAL();
} }
/* eptr is now past the end of the maximum run */ /* eptr is now past the end of the maximum run */
@ -5534,6 +5648,15 @@ for (;;)
break; break;
} }
if (IS_NEWLINE(eptr)) break; if (IS_NEWLINE(eptr)) break;
if (md->partial != 0 && /* Take care with CRLF partial */
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
eptr++; eptr++;
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
} }
@ -5551,6 +5674,15 @@ for (;;)
break; break;
} }
if (IS_NEWLINE(eptr)) break; if (IS_NEWLINE(eptr)) break;
if (md->partial != 0 && /* Take care with CRLF partial */
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
eptr++; eptr++;
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
} }
@ -5815,6 +5947,15 @@ for (;;)
break; break;
} }
if (IS_NEWLINE(eptr)) break; if (IS_NEWLINE(eptr)) break;
if (md->partial != 0 && /* Take care with CRLF partial */
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
*eptr == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
eptr++; eptr++;
} }
break; break;
@ -6145,6 +6286,31 @@ Undefine all the macros that were defined above to handle this. */
***************************************************************************/ ***************************************************************************/
#ifdef NO_RECURSE
/*************************************************
* Release allocated heap frames *
*************************************************/
/* This function releases all the allocated frames. The base frame is on the
machine stack, and so must not be freed.
Argument: the address of the base frame
Returns: nothing
*/
static void
release_match_heapframes (heapframe *frame_base)
{
heapframe *nextframe = frame_base->Xnextframe;
while (nextframe != NULL)
{
heapframe *oldframe = nextframe;
nextframe = nextframe->Xnextframe;
(PUBL(stack_free))(oldframe);
}
}
#endif
/************************************************* /*************************************************
* Execute a Regular Expression * * Execute a Regular Expression *
@ -6207,13 +6373,22 @@ PCRE_PUCHAR req_char_ptr = start_match - 1;
const pcre_study_data *study; const pcre_study_data *study;
const REAL_PCRE *re = (const REAL_PCRE *)argument_re; const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
#ifdef NO_RECURSE
heapframe frame_zero;
frame_zero.Xprevframe = NULL; /* Marks the top level */
frame_zero.Xnextframe = NULL; /* None are allocated yet */
md->match_frames_base = &frame_zero;
#endif
/* Check for the special magic call that measures the size of the stack used /* Check for the special magic call that measures the size of the stack used
per recursive call of match(). */ per recursive call of match(). Without the funny casting for sizeof, a Windows
compiler gave this error: "unary minus operator applied to unsigned type,
result still unsigned". Hopefully the cast fixes that. */
if (re == NULL && extra_data == NULL && subject == NULL && length == -999 && if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
start_offset == -999) start_offset == -999)
#ifdef NO_RECURSE #ifdef NO_RECURSE
return -sizeof(heapframe); return -((int)sizeof(heapframe));
#else #else
return match(NULL, NULL, NULL, 0, NULL, NULL, 0); return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
#endif #endif
@ -6280,20 +6455,25 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
/* If the pattern was successfully studied with JIT support, run the JIT /* If the pattern was successfully studied with JIT support, run the JIT
executable instead of the rest of this function. Most options must be set at executable instead of the rest of this function. Most options must be set at
compile time for the JIT code to be usable. Fallback to the normal code path if compile time for the JIT code to be usable. Fallback to the normal code path if
an unsupported flag is set. In particular, JIT does not support partial an unsupported flag is set. */
matching. */
#ifdef SUPPORT_JIT #ifdef SUPPORT_JIT
if (extra_data != NULL if (extra_data != NULL
&& (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
&& extra_data->executable_jit != NULL && extra_data->executable_jit != NULL
&& (extra_data->flags & PCRE_EXTRA_TABLES) == 0
&& (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL | && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0) PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
return PRIV(jit_exec)(re, extra_data->executable_jit, PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
(const pcre_uchar *)subject, length, start_offset, options, {
((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount); start_offset, options, offsets, offsetcount);
/* PCRE_ERROR_NULL means that the selected normal or partial matching
mode is not compiled. In this case we simply fallback to interpreter. */
if (rc != PCRE_ERROR_NULL) return rc;
}
#endif #endif
/* Carry on with non-JIT matching. This information is for finding all the /* Carry on with non-JIT matching. This information is for finding all the
@ -6887,7 +7067,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
{ {
register int *iptr, *iend; register int *iptr, *iend;
int resetcount = 2 + re->top_bracket * 2; int resetcount = 2 + re->top_bracket * 2;
if (resetcount > offsetcount) resetcount = ocount; if (resetcount > offsetcount) resetcount = offsetcount;
iptr = offsets + md->end_offset_top; iptr = offsets + md->end_offset_top;
iend = offsets + resetcount; iend = offsets + resetcount;
while (iptr < iend) *iptr++ = -1; while (iptr < iend) *iptr++ = -1;
@ -6908,6 +7088,9 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0) if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
*(extra_data->mark) = (pcre_uchar *)md->mark; *(extra_data->mark) = (pcre_uchar *)md->mark;
DPRINTF((">>>> returning %d\n", rc)); DPRINTF((">>>> returning %d\n", rc));
#ifdef NO_RECURSE
release_match_heapframes(&frame_zero);
#endif
return rc; return rc;
} }
@ -6925,6 +7108,9 @@ if (using_temporary_offsets)
if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL) if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
{ {
DPRINTF((">>>> error: returning %d\n", rc)); DPRINTF((">>>> error: returning %d\n", rc));
#ifdef NO_RECURSE
release_match_heapframes(&frame_zero);
#endif
return rc; return rc;
} }
@ -6954,6 +7140,9 @@ else
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0) if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
*(extra_data->mark) = (pcre_uchar *)md->nomatch_mark; *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
#ifdef NO_RECURSE
release_match_heapframes(&frame_zero);
#endif
return rc; return rc;
} }

View File

@ -193,6 +193,10 @@ switch (what)
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0; *((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
break; break;
case PCRE_INFO_MAXLOOKBEHIND:
*((int *)where) = re->max_lookbehind;
break;
default: return PCRE_ERROR_BADOPTION; default: return PCRE_ERROR_BADOPTION;
} }

View File

@ -58,7 +58,11 @@ global variables are not used. */
#include "pcre_internal.h" #include "pcre_internal.h"
#ifdef GLIB_COMPILATION
#include "gmem.h" #include "gmem.h"
#else
#include <glib.h>
#endif /* GLIB_COMPILATION */
#if defined _MSC_VER || defined __SYMBIAN32__ #if defined _MSC_VER || defined __SYMBIAN32__
static void* LocalPcreMalloc(size_t aSize) static void* LocalPcreMalloc(size_t aSize)

View File

@ -523,6 +523,11 @@ capturing parenthesis numbers in back references. */
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE #define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
/* The maximum length of a MARK name is currently one data unit; it may be
changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
#define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)
/* When UTF encoding is being used, a character is no longer just a single /* When UTF encoding is being used, a character is no longer just a single
character. The macros for character handling generate simple sequences when character. The macros for character handling generate simple sequences when
used in character-mode, and more complicated ones for UTF characters. used in character-mode, and more complicated ones for UTF characters.
@ -887,7 +892,8 @@ time, run time, or study time, respectively. */
PCRE_NO_START_OPTIMIZE) PCRE_NO_START_OPTIMIZE)
#define PUBLIC_STUDY_OPTIONS \ #define PUBLIC_STUDY_OPTIONS \
PCRE_STUDY_JIT_COMPILE (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \
PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
/* Magic number to provide a small check against being handed junk. */ /* Magic number to provide a small check against being handed junk. */
@ -1939,7 +1945,11 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT }; ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
JIT_NUMBER_OF_COMPILE_MODES };
/* The real format of the start of the pcre block; the index of names and the /* The real format of the start of the pcre block; the index of names and the
code vector run on as long as necessary after the end. We store an explicit code vector run on as long as necessary after the end. We store an explicit
@ -1969,16 +1979,15 @@ typedef struct REAL_PCRE {
pcre_uint32 size; /* Total that was malloced */ pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */ pcre_uint32 options; /* Public options */
pcre_uint16 flags; /* Private flags */ pcre_uint16 flags; /* Private flags */
pcre_uint16 dummy1; /* For future use */ pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; pcre_uint16 top_backref; /* Highest numbered back reference */
pcre_uint16 first_char; /* Starting character */ pcre_uint16 first_char; /* Starting character */
pcre_uint16 req_char; /* This character must be seen */ pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */ pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */ pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */ pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */ pcre_uint16 ref_count; /* Reference count */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
const pcre_uint8 *nullpad; /* NULL padding */ const pcre_uint8 *nullpad; /* NULL padding */
} REAL_PCRE; } REAL_PCRE;
@ -2024,6 +2033,7 @@ typedef struct compile_data {
int workspace_size; /* Size of workspace */ int workspace_size; /* Size of workspace */
int bracount; /* Count of capturing parens as we compile */ int bracount; /* Count of capturing parens as we compile */
int final_bracount; /* Saved value after first pass */ int final_bracount; /* Saved value after first pass */
int max_lookbehind; /* Maximum lookbehind (characters) */
int top_backref; /* Maximum back reference */ int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */ unsigned int backref_map; /* Bitmap of low back refs */
int assert_depth; /* Depth of nested assertions */ int assert_depth; /* Depth of nested assertions */
@ -2125,6 +2135,9 @@ typedef struct match_data {
const pcre_uchar *mark; /* Mark pointer to pass back on success */ const pcre_uchar *mark; /* Mark pointer to pass back on success */
const pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */ const pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */
const pcre_uchar *once_target; /* Where to back up to for atomic groups */ const pcre_uchar *once_target; /* Where to back up to for atomic groups */
#ifdef NO_RECURSE
void *match_frames_base; /* For remembering malloc'd frames */
#endif
} match_data; } match_data;
/* A similar structure is used for the same purpose by the DFA matching /* A similar structure is used for the same purpose by the DFA matching
@ -2179,7 +2192,7 @@ total length. */
#define ctypes_offset (cbits_offset + cbit_length) #define ctypes_offset (cbits_offset + cbit_length)
#define tables_length (ctypes_offset + 256) #define tables_length (ctypes_offset + 256)
/* Internal function prefix */ /* Internal function and data prefixes. */
#ifdef COMPILE_PCRE8 #ifdef COMPILE_PCRE8
#ifndef PUBL #ifndef PUBL
@ -2288,9 +2301,10 @@ extern BOOL PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
extern BOOL PRIV(xclass)(int, const pcre_uchar *, BOOL); extern BOOL PRIV(xclass)(int, const pcre_uchar *, BOOL);
#ifdef SUPPORT_JIT #ifdef SUPPORT_JIT
extern void PRIV(jit_compile)(const REAL_PCRE *, PUBL(extra) *); extern void PRIV(jit_compile)(const REAL_PCRE *,
extern int PRIV(jit_exec)(const REAL_PCRE *, void *, PUBL(extra) *, int);
const pcre_uchar *, int, int, int, int, int *, int); extern int PRIV(jit_exec)(const REAL_PCRE *, const PUBL(extra) *,
const pcre_uchar *, int, int, int, int *, int);
extern void PRIV(jit_free)(void *); extern void PRIV(jit_free)(void *);
extern int PRIV(jit_get_size)(void *); extern int PRIV(jit_get_size)(void *);
extern const char* PRIV(jit_get_target)(void); extern const char* PRIV(jit_get_target)(void);
@ -2298,15 +2312,6 @@ extern const char* PRIV(jit_get_target)(void);
/* Unicode character database (UCD) */ /* Unicode character database (UCD) */
typedef struct {
pcre_uint8 script;
pcre_uint8 chartype;
pcre_int32 other_case;
} ucd_record;
extern const ucd_record PRIV(ucd_records)[];
extern const pcre_uint8 PRIV(ucd_stage1)[];
extern const pcre_uint16 PRIV(ucd_stage2)[];
extern const int PRIV(ucp_gentype)[]; extern const int PRIV(ucp_gentype)[];
#ifdef SUPPORT_JIT #ifdef SUPPORT_JIT
extern const int PRIV(ucp_typerange)[]; extern const int PRIV(ucp_typerange)[];

File diff suppressed because it is too large Load Diff

View File

@ -1123,7 +1123,7 @@ do
case OP_HSPACE: case OP_HSPACE:
SET_BIT(0x09); SET_BIT(0x09);
SET_BIT(0x20); SET_BIT(0x20);
#ifdef COMPILE_PCRE8 #ifdef SUPPORT_UTF
if (utf) if (utf)
{ {
#ifdef COMPILE_PCRE8 #ifdef COMPILE_PCRE8
@ -1148,7 +1148,7 @@ do
SET_BIT(0x0B); SET_BIT(0x0B);
SET_BIT(0x0C); SET_BIT(0x0C);
SET_BIT(0x0D); SET_BIT(0x0D);
#ifdef COMPILE_PCRE8 #ifdef SUPPORT_UTF
if (utf) if (utf)
{ {
#ifdef COMPILE_PCRE8 #ifdef COMPILE_PCRE8
@ -1418,7 +1418,8 @@ we don't have to change that code. */
if (bits_set || min > 0 if (bits_set || min > 0
#ifdef SUPPORT_JIT #ifdef SUPPORT_JIT
|| (options & PCRE_STUDY_JIT_COMPILE) != 0 || (options & (PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
| PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)) != 0
#endif #endif
) )
{ {
@ -1478,7 +1479,13 @@ if (bits_set || min > 0
#ifdef SUPPORT_JIT #ifdef SUPPORT_JIT
extra->executable_jit = NULL; extra->executable_jit = NULL;
if ((options & PCRE_STUDY_JIT_COMPILE) != 0) PRIV(jit_compile)(re, extra); if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
PRIV(jit_compile)(re, extra, JIT_COMPILE);
if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0) if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)
{ {
#ifdef COMPILE_PCRE8 #ifdef COMPILE_PCRE8

View File

@ -587,7 +587,7 @@ const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
unsigned int unsigned int
_pcre_ucp_othercase(const unsigned int c) _pcre_ucp_othercase(const unsigned int c)
{ {
unsigned int oc; unsigned int oc = NOTACHAR;
if ((oc = g_unichar_toupper(c)) != c) if ((oc = g_unichar_toupper(c)) != c)
return oc; return oc;

View File

@ -10,7 +10,11 @@ the UCD access macros. New values that are added for new releases of Unicode
should always be at the end of each enum, for backwards compatibility. */ should always be at the end of each enum, for backwards compatibility. */
/* These are the general character categories. */ /* These are the general character categories. */
#ifdef GLIB_COMPILATION
#include "gunicode.h" #include "gunicode.h"
#else
#include <glib.h>
#endif
enum { enum {
ucp_C, /* Other */ ucp_C, /* Other */
@ -60,6 +64,9 @@ enum {
/* These are the script identifications. */ /* These are the script identifications. */
enum { enum {
ucp_Common = G_UNICODE_SCRIPT_COMMON,
ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
ucp_Arabic = G_UNICODE_SCRIPT_ARABIC, ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN, ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
ucp_Bengali = G_UNICODE_SCRIPT_BENGALI, ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
@ -69,7 +76,6 @@ enum {
ucp_Buhid = G_UNICODE_SCRIPT_BUHID, ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE, ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
ucp_Common = G_UNICODE_SCRIPT_COMMON,
ucp_Coptic = G_UNICODE_SCRIPT_COPTIC, ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT, ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC, ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
@ -87,7 +93,6 @@ enum {
ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO, ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW, ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA, ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
ucp_Kannada = G_UNICODE_SCRIPT_KANNADA, ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA, ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI, ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,

View File

@ -1,11 +1,10 @@
From 384879be07418fc6224b6603a2e8ca6f11e178fc Mon Sep 17 00:00:00 2001 From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001
From: Christian Persch <chpe@gnome.org> From: Christian Persch <chpe@gnome.org>
Date: Sun, 12 Feb 2012 21:20:33 +0100 Date: Sun, 12 Feb 2012 21:20:33 +0100
Subject: [PATCH] regex: Use glib for unicode data Subject: [PATCH] regex: Use glib for unicode data
Use g_unichar_type() and g_unichar_get_script() instead of pcre tables. Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
--- ---
glib/pcre/Makefile.am | 1 -
glib/pcre/pcre_compile.c | 26 +++--- glib/pcre/pcre_compile.c | 26 +++---
glib/pcre/pcre_dfa_exec.c | 96 ++++++++-------- glib/pcre/pcre_dfa_exec.c | 96 ++++++++--------
glib/pcre/pcre_exec.c | 26 +++--- glib/pcre/pcre_exec.c | 26 +++---
@ -13,25 +12,13 @@ Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
glib/pcre/pcre_tables.c | 16 +++ glib/pcre/pcre_tables.c | 16 +++
glib/pcre/pcre_xclass.c | 24 ++-- glib/pcre/pcre_xclass.c | 24 ++--
glib/pcre/ucp.h | 265 +++++++++++++++++++++++---------------------- glib/pcre/ucp.h | 265 +++++++++++++++++++++++----------------------
8 files changed, 239 insertions(+), 226 deletions(-) 7 files changed, 239 insertions(+), 225 deletions(-)
diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am
index 21da5c5..1981953 100644
--- a/glib/pcre/Makefile.am
+++ b/glib/pcre/Makefile.am
@@ -51,7 +51,6 @@ libpcre_la_SOURCES = \
pcre_string_utils.c \
pcre_study.c \
pcre_tables.c \
- pcre_ucd.c \
pcre_valid_utf8.c \
pcre_version.c \
pcre_xclass.c \
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
index eb985df..b44055a 100644 index 21bef80..a6c84e1 100644
--- a/glib/pcre/pcre_compile.c --- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c +++ b/glib/pcre/pcre_compile.c
@@ -2890,43 +2890,43 @@ Returns: TRUE if auto-possessifying is OK @@ -2920,43 +2920,43 @@ Returns: TRUE if auto-possessifying is OK
static BOOL static BOOL
check_char_prop(int c, int ptype, int pdata, BOOL negated) check_char_prop(int c, int ptype, int pdata, BOOL negated)
{ {
@ -89,10 +76,10 @@ index eb985df..b44055a 100644
} }
return FALSE; return FALSE;
diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
index 21d7be6..41ff65b 100644 index 9565d46..3f913ce 100644
--- a/glib/pcre/pcre_dfa_exec.c --- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c +++ b/glib/pcre/pcre_dfa_exec.c
@@ -1015,7 +1015,7 @@ for (;;) @@ -1060,7 +1060,7 @@ for (;;)
if (clen > 0) if (clen > 0)
{ {
BOOL OK; BOOL OK;
@ -101,7 +88,7 @@ index 21d7be6..41ff65b 100644
switch(code[1]) switch(code[1])
{ {
case PT_ANY: case PT_ANY:
@@ -1023,43 +1023,43 @@ for (;;) @@ -1068,43 +1068,43 @@ for (;;)
break; break;
case PT_LAMP: case PT_LAMP:
@ -156,7 +143,7 @@ index 21d7be6..41ff65b 100644
c == CHAR_UNDERSCORE; c == CHAR_UNDERSCORE;
break; break;
@@ -1209,7 +1209,7 @@ for (;;) @@ -1294,7 +1294,7 @@ for (;;)
if (clen > 0) if (clen > 0)
{ {
BOOL OK; BOOL OK;
@ -165,7 +152,7 @@ index 21d7be6..41ff65b 100644
switch(code[2]) switch(code[2])
{ {
case PT_ANY: case PT_ANY:
@@ -1217,43 +1217,43 @@ for (;;) @@ -1302,43 +1302,43 @@ for (;;)
break; break;
case PT_LAMP: case PT_LAMP:
@ -220,7 +207,7 @@ index 21d7be6..41ff65b 100644
c == CHAR_UNDERSCORE; c == CHAR_UNDERSCORE;
break; break;
@@ -1456,7 +1456,7 @@ for (;;) @@ -1541,7 +1541,7 @@ for (;;)
if (clen > 0) if (clen > 0)
{ {
BOOL OK; BOOL OK;
@ -229,7 +216,7 @@ index 21d7be6..41ff65b 100644
switch(code[2]) switch(code[2])
{ {
case PT_ANY: case PT_ANY:
@@ -1464,43 +1464,43 @@ for (;;) @@ -1549,43 +1549,43 @@ for (;;)
break; break;
case PT_LAMP: case PT_LAMP:
@ -284,7 +271,7 @@ index 21d7be6..41ff65b 100644
c == CHAR_UNDERSCORE; c == CHAR_UNDERSCORE;
break; break;
@@ -1728,7 +1728,7 @@ for (;;) @@ -1813,7 +1813,7 @@ for (;;)
if (clen > 0) if (clen > 0)
{ {
BOOL OK; BOOL OK;
@ -293,7 +280,7 @@ index 21d7be6..41ff65b 100644
switch(code[1 + IMM2_SIZE + 1]) switch(code[1 + IMM2_SIZE + 1])
{ {
case PT_ANY: case PT_ANY:
@@ -1736,43 +1736,43 @@ for (;;) @@ -1821,43 +1821,43 @@ for (;;)
break; break;
case PT_LAMP: case PT_LAMP:
@ -349,10 +336,10 @@ index 21d7be6..41ff65b 100644
break; break;
diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
index b715353..8eb3162 100644 index 830b8b5..c89a3f9 100644
--- a/glib/pcre/pcre_exec.c --- a/glib/pcre/pcre_exec.c
+++ b/glib/pcre/pcre_exec.c +++ b/glib/pcre/pcre_exec.c
@@ -2507,7 +2507,7 @@ for (;;) @@ -2565,7 +2565,7 @@ for (;;)
} }
GETCHARINCTEST(c, eptr); GETCHARINCTEST(c, eptr);
{ {
@ -361,7 +348,7 @@ index b715353..8eb3162 100644
switch(ecode[1]) switch(ecode[1])
{ {
@@ -2516,44 +2516,44 @@ for (;;) @@ -2574,44 +2574,44 @@ for (;;)
break; break;
case PT_LAMP: case PT_LAMP:
@ -416,7 +403,7 @@ index b715353..8eb3162 100644
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR) c == CHAR_FF || c == CHAR_CR)
== (op == OP_NOTPROP)) == (op == OP_NOTPROP))
@@ -2561,8 +2561,8 @@ for (;;) @@ -2619,8 +2619,8 @@ for (;;)
break; break;
case PT_WORD: case PT_WORD:
@ -428,10 +415,10 @@ index b715353..8eb3162 100644
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
break; break;
diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
index e5a4b6a..41c7ee3 100644 index 181c312..234af1b 100644
--- a/glib/pcre/pcre_internal.h --- a/glib/pcre/pcre_internal.h
+++ b/glib/pcre/pcre_internal.h +++ b/glib/pcre/pcre_internal.h
@@ -2315,15 +2315,12 @@ extern const int PRIV(ucp_typerange)[]; @@ -2329,15 +2329,12 @@ extern const int PRIV(ucp_typerange)[];
#ifdef SUPPORT_UCP #ifdef SUPPORT_UCP
/* UCD access macros */ /* UCD access macros */
@ -452,10 +439,10 @@ index e5a4b6a..41c7ee3 100644
#endif /* SUPPORT_UCP */ #endif /* SUPPORT_UCP */
diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
index c8134ec..47becc7 100644 index 7ac2d89..e401974 100644
--- a/glib/pcre/pcre_tables.c --- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c +++ b/glib/pcre/pcre_tables.c
@@ -563,6 +563,22 @@ const ucp_type_table PRIV(utt)[] = { @@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = {
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);