From 94578330109eb7fb1588c0d0d0bb526bfb0ce9b6 Mon Sep 17 00:00:00 2001 From: Christian Persch Date: Thu, 14 Jun 2012 22:15:27 +0200 Subject: [PATCH] regex: Import PCRE 8.31 https://bugzilla.gnome.org/show_bug.cgi?id=679193 --- glib/pcre/pcre.h | 10 +- glib/pcre/pcre_compile.c | 195 ++-- glib/pcre/pcre_dfa_exec.c | 225 +++- glib/pcre/pcre_exec.c | 335 ++++-- glib/pcre/pcre_fullinfo.c | 4 + glib/pcre/pcre_globals.c | 4 + glib/pcre/pcre_internal.h | 43 +- glib/pcre/pcre_jit_compile.c | 2030 ++++++++++++++++++++++------------ glib/pcre/pcre_study.c | 15 +- glib/pcre/pcre_tables.c | 292 ++--- glib/pcre/ucp.h | 9 +- glib/update-pcre/ucp.patch | 55 +- 12 files changed, 2090 insertions(+), 1127 deletions(-) diff --git a/glib/pcre/pcre.h b/glib/pcre/pcre.h index 712bd3d71..b71ead37a 100644 --- a/glib/pcre/pcre.h +++ b/glib/pcre/pcre.h @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE_MAJOR 8 -#define PCRE_MINOR 30 +#define PCRE_MINOR 31 #define PCRE_PRERELEASE -#define PCRE_DATE 2012-02-04 +#define PCRE_DATE 2012-07-06 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE, the appropriate @@ -179,6 +179,7 @@ compiling). */ #define PCRE_ERROR_JIT_STACKLIMIT (-27) #define PCRE_ERROR_BADMODE (-28) #define PCRE_ERROR_BADENDIANNESS (-29) +#define PCRE_ERROR_DFA_BADRESTART (-30) /* Specific error codes for UTF-8 validity checks */ @@ -234,6 +235,7 @@ compiling). */ #define PCRE_INFO_MINLENGTH 15 #define PCRE_INFO_JIT 16 #define PCRE_INFO_JITSIZE 17 +#define PCRE_INFO_MAXLOOKBEHIND 18 /* Request types for pcre_config(). Do not re-arrange, in order to remain compatible. */ @@ -254,7 +256,9 @@ compatible. */ /* Request types for pcre_study(). Do not re-arrange, in order to remain compatible. */ -#define PCRE_STUDY_JIT_COMPILE 0x0001 +#define PCRE_STUDY_JIT_COMPILE 0x0001 +#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002 +#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004 /* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine these bits, just add new ones on the end, in order to remain compatible. */ diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c index b44055aac..28dc2ed1d 100644 --- a/glib/pcre/pcre_compile.c +++ b/glib/pcre/pcre_compile.c @@ -52,7 +52,11 @@ supporting internal functions that are not used by other modules. */ #include "pcre_internal.h" +#ifdef GLIB_COMPILATION #include "gstrfuncs.h" +#else +#include +#endif /* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which is also used by pcretest. PCRE_DEBUG is not defined when building a production @@ -490,6 +494,9 @@ static const char error_texts[] = "too many forward references\0" "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" "invalid UTF-16 string\0" + /* 75 */ + "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" + "character value in \\u.... sequence is too large\0" ; /* Table to identify digits and hex digits. This is used when compiling @@ -831,6 +838,18 @@ else c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); #endif } + +#ifdef COMPILE_PCRE8 + if (c > (utf ? 0x10ffff : 0xff)) +#else +#ifdef COMPILE_PCRE16 + if (c > (utf ? 0x10ffff : 0xffff)) +#endif +#endif + { + *errorcodeptr = ERR76; + } + else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; } } else @@ -2227,32 +2246,60 @@ for (;;) { case OP_CHAR: case OP_CHARI: + case OP_NOT: + case OP_NOTI: case OP_EXACT: case OP_EXACTI: + case OP_NOTEXACT: + case OP_NOTEXACTI: case OP_UPTO: case OP_UPTOI: + case OP_NOTUPTO: + case OP_NOTUPTOI: case OP_MINUPTO: case OP_MINUPTOI: + case OP_NOTMINUPTO: + case OP_NOTMINUPTOI: case OP_POSUPTO: case OP_POSUPTOI: + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: case OP_STAR: case OP_STARI: + case OP_NOTSTAR: + case OP_NOTSTARI: case OP_MINSTAR: case OP_MINSTARI: + case OP_NOTMINSTAR: + case OP_NOTMINSTARI: case OP_POSSTAR: case OP_POSSTARI: + case OP_NOTPOSSTAR: + case OP_NOTPOSSTARI: case OP_PLUS: case OP_PLUSI: + case OP_NOTPLUS: + case OP_NOTPLUSI: case OP_MINPLUS: case OP_MINPLUSI: + case OP_NOTMINPLUS: + case OP_NOTMINPLUSI: case OP_POSPLUS: case OP_POSPLUSI: + case OP_NOTPOSPLUS: + case OP_NOTPOSPLUSI: case OP_QUERY: case OP_QUERYI: + case OP_NOTQUERY: + case OP_NOTQUERYI: case OP_MINQUERY: case OP_MINQUERYI: + case OP_NOTMINQUERY: + case OP_NOTMINQUERYI: case OP_POSQUERY: case OP_POSQUERYI: + case OP_NOTPOSQUERY: + case OP_NOTPOSQUERYI: if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); break; } @@ -3071,22 +3118,28 @@ if (next >= 0) switch(op_code) #endif /* SUPPORT_UTF */ return (c != TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */ - /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These - opcodes are not used for multi-byte characters, because they are coded using - an XCLASS instead. */ - case OP_NOT: - return (c = *previous) == next; +#ifdef SUPPORT_UTF + GETCHARTEST(c, previous); +#else + c = *previous; +#endif + return c == next; case OP_NOTI: - if ((c = *previous) == next) return TRUE; +#ifdef SUPPORT_UTF + GETCHARTEST(c, previous); +#else + c = *previous; +#endif + if (c == next) return TRUE; #ifdef SUPPORT_UTF if (utf) { unsigned int othercase; if (next < 128) othercase = cd->fcc[next]; else #ifdef SUPPORT_UCP - othercase = UCD_OTHERCASE(next); + othercase = UCD_OTHERCASE((unsigned int)next); #else othercase = NOTACHAR; #endif @@ -3094,28 +3147,28 @@ if (next >= 0) switch(op_code) } else #endif /* SUPPORT_UTF */ - return (c == (int)(TABLE_GET((unsigned int)next, cd->fcc, next))); /* Non-UTF-8 mode */ + return (c == TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */ /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ case OP_DIGIT: - return next > 127 || (cd->ctypes[next] & ctype_digit) == 0; + return next > 255 || (cd->ctypes[next] & ctype_digit) == 0; case OP_NOT_DIGIT: - return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0; + return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0; case OP_WHITESPACE: - return next > 127 || (cd->ctypes[next] & ctype_space) == 0; + return next > 255 || (cd->ctypes[next] & ctype_space) == 0; case OP_NOT_WHITESPACE: - return next <= 127 && (cd->ctypes[next] & ctype_space) != 0; + return next <= 255 && (cd->ctypes[next] & ctype_space) != 0; case OP_WORDCHAR: - return next > 127 || (cd->ctypes[next] & ctype_word) == 0; + return next > 255 || (cd->ctypes[next] & ctype_word) == 0; case OP_NOT_WORDCHAR: - return next <= 127 && (cd->ctypes[next] & ctype_word) != 0; + return next <= 255 && (cd->ctypes[next] & ctype_word) != 0; case OP_HSPACE: case OP_NOT_HSPACE: @@ -3193,22 +3246,22 @@ switch(op_code) switch(-next) { case ESC_d: - return c > 127 || (cd->ctypes[c] & ctype_digit) == 0; + return c > 255 || (cd->ctypes[c] & ctype_digit) == 0; case ESC_D: - return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0; + return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0; case ESC_s: - return c > 127 || (cd->ctypes[c] & ctype_space) == 0; + return c > 255 || (cd->ctypes[c] & ctype_space) == 0; case ESC_S: - return c <= 127 && (cd->ctypes[c] & ctype_space) != 0; + return c <= 255 && (cd->ctypes[c] & ctype_space) != 0; case ESC_w: - return c > 127 || (cd->ctypes[c] & ctype_word) == 0; + return c > 255 || (cd->ctypes[c] & ctype_word) == 0; case ESC_W: - return c <= 127 && (cd->ctypes[c] & ctype_word) != 0; + return c <= 255 && (cd->ctypes[c] & ctype_word) != 0; case ESC_h: case ESC_H: @@ -3317,10 +3370,10 @@ switch(op_code) return next == -ESC_d; case OP_WHITESPACE: - return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R; + return next == -ESC_S || next == -ESC_d || next == -ESC_w; case OP_NOT_WHITESPACE: - return next == -ESC_s || next == -ESC_h || next == -ESC_v; + return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R; case OP_HSPACE: return next == -ESC_S || next == -ESC_H || next == -ESC_d || @@ -4484,42 +4537,35 @@ for (;; ptr++) LONE_SINGLE_CHARACTER: /* Only the value of 1 matters for class_single_char. */ + if (class_single_char < 2) class_single_char++; /* If class_charcount is 1, we saw precisely one character. As long as - there were no negated characters >= 128 and there was no use of \p or \P, - in other words, no use of any XCLASS features, we can optimize. - - In UTF-8 mode, we can optimize the negative case only if there were no - characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR - operate on single-bytes characters only. This is an historical hangover. - Maybe one day we can tidy these opcodes to handle multi-byte characters. + there was no use of \p or \P, in other words, no use of any XCLASS + features, we can optimize. The optimization throws away the bit map. We turn the item into a 1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative. - Note that OP_NOT[I] does not support multibyte characters. In the positive - case, it can cause firstchar to be set. Otherwise, there can be no first - char if this item is first, whatever repeat count may follow. In the case - of reqchar, save the previous value for reinstating. */ + In the positive case, it can cause firstchar to be set. Otherwise, there + can be no first char if this item is first, whatever repeat count may + follow. In the case of reqchar, save the previous value for reinstating. */ -#ifdef SUPPORT_UTF - if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET - && (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1))) -#else if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) -#endif { ptr++; zeroreqchar = reqchar; - /* The OP_NOT[I] opcodes work on single characters only. */ - if (negate_class) { if (firstchar == REQ_UNSET) firstchar = REQ_NONE; zerofirstchar = firstchar; *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; - *code++ = c; +#ifdef SUPPORT_UTF + if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) + code += PRIV(ord2utf)(c, code); + else +#endif + *code++ = c; goto NOT_CHAR; } @@ -4777,15 +4823,23 @@ for (;; ptr++) /* Now handle repetition for the different types of item. */ - /* If previous was a character match, abolish the item and generate a - repeat item instead. If a char item has a minumum of more than one, ensure - that it is set in reqchar - it might not be if a sequence such as x{3} is - the first thing in a branch because the x will have gone into firstchar - instead. */ + /* If previous was a character or negated character match, abolish the item + and generate a repeat item instead. If a char item has a minimum of more + than one, ensure that it is set in reqchar - it might not be if a sequence + such as x{3} is the first thing in a branch because the x will have gone + into firstchar instead. */ - if (*previous == OP_CHAR || *previous == OP_CHARI) + if (*previous == OP_CHAR || *previous == OP_CHARI + || *previous == OP_NOT || *previous == OP_NOTI) { - op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR; + switch (*previous) + { + default: /* Make compiler happy. */ + case OP_CHAR: op_type = OP_STAR - OP_STAR; break; + case OP_CHARI: op_type = OP_STARI - OP_STAR; break; + case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break; + case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break; + } /* Deal with UTF characters that take up more than one character. It's easier to write this out separately than try to macrify it. Use c to @@ -4808,7 +4862,8 @@ for (;; ptr++) with UTF disabled, or for a single character UTF character. */ { c = code[-1]; - if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt; + if (*previous <= OP_CHARI && repeat_min > 1) + reqchar = c | req_caseopt | cd->req_varyopt; } /* If the repetition is unlimited, it pays to see if the next thing on @@ -4827,26 +4882,6 @@ for (;; ptr++) goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ } - /* If previous was a single negated character ([^a] or similar), we use - one of the special opcodes, replacing it. The code is shared with single- - character repeats by setting opt_type to add a suitable offset into - repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI - are currently used only for single-byte chars. */ - - else if (*previous == OP_NOT || *previous == OP_NOTI) - { - op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR; - c = previous[1]; - if (!possessive_quantifier && - repeat_max < 0 && - check_auto_possessive(previous, utf, ptr + 1, options, cd)) - { - repeat_type = 0; /* Force greedy */ - possessive_quantifier = TRUE; - } - goto OUTPUT_SINGLE_REPEAT; - } - /* If previous was a character type match (\d or similar), abolish it and create a suitable repeat item. The code is shared with single-character repeats by setting op_type to add a suitable offset into repeat_type. Note @@ -5587,6 +5622,11 @@ for (;; ptr++) arg = ++ptr; while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; arglen = (int)(ptr - arg); + if (arglen > (int)MAX_MARK) + { + *errorcodeptr = ERR75; + goto FAILED; + } } if (*ptr != CHAR_RIGHT_PARENTHESIS) @@ -6838,10 +6878,13 @@ for (;; ptr++) /* For the rest (including \X when Unicode properties are supported), we can obtain the OP value by negating the escape value in the default situation when PCRE_UCP is not set. When it *is* set, we substitute - Unicode property tests. */ + Unicode property tests. Note that \b and \B do a one-character + lookbehind. */ else { + if ((-c == ESC_b || -c == ESC_B) && cd->max_lookbehind == 0) + cd->max_lookbehind = 1; #ifdef SUPPORT_UCP if (-c >= ESC_DU && -c <= ESC_wu) { @@ -7149,7 +7192,12 @@ for (;;) *ptrptr = ptr; return FALSE; } - else { PUT(reverse_count, 0, fixed_length); } + else + { + if (fixed_length > cd->max_lookbehind) + cd->max_lookbehind = fixed_length; + PUT(reverse_count, 0, fixed_length); + } } } @@ -7819,6 +7867,7 @@ cd->start_pattern = (const pcre_uchar *)pattern; cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern)); cd->req_varyopt = 0; cd->assert_depth = 0; +cd->max_lookbehind = 0; cd->external_options = options; cd->external_flags = 0; cd->open_caps = NULL; @@ -7869,7 +7918,6 @@ re->magic_number = MAGIC_NUMBER; re->size = (int)size; re->options = cd->external_options; re->flags = cd->external_flags; -re->dummy1 = 0; re->first_char = 0; re->req_char = 0; re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar); @@ -7889,6 +7937,7 @@ field; this time it's used for remembering forward references to subpatterns. cd->final_bracount = cd->bracount; /* Save for checking forward references */ cd->assert_depth = 0; cd->bracount = 0; +cd->max_lookbehind = 0; cd->names_found = 0; cd->name_table = (pcre_uchar *)re + re->name_table_offset; codestart = cd->name_table + re->name_entry_size * re->name_count; @@ -7910,6 +7959,7 @@ code = (pcre_uchar *)codestart; &firstchar, &reqchar, NULL, cd, NULL); re->top_bracket = cd->bracount; re->top_backref = cd->top_backref; +re->max_lookbehind = cd->max_lookbehind; re->flags = cd->external_flags | PCRE_MODE; if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */ @@ -7997,6 +8047,7 @@ if (cd->check_lookbehind) (fixed_length == -4)? ERR70 : ERR25; break; } + if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length; PUT(cc, 1, fixed_length); } cc += 1 + LINK_SIZE; diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c index 41ff65bea..5593d393f 100644 --- a/glib/pcre/pcre_dfa_exec.c +++ b/glib/pcre/pcre_dfa_exec.c @@ -38,10 +38,9 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ - /* This module contains the external function pcre_dfa_exec(), which is an alternative matching function that uses a sort of DFA algorithm (not a true -FSM). This is NOT Perl- compatible, but it has advantages in certain +FSM). This is NOT Perl-compatible, but it has advantages in certain applications. */ @@ -282,7 +281,7 @@ typedef struct stateblock { int data; /* Some use extra data */ } stateblock; -#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int)) +#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int)) #ifdef PCRE_DEBUG @@ -382,7 +381,8 @@ for the current character, one for the following character). */ next_new_state->count = (y); \ next_new_state->data = (z); \ next_new_state++; \ - DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ + DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \ + (x), (y), (z), __LINE__)); \ } \ else return PCRE_ERROR_DFA_WSSIZE @@ -424,6 +424,8 @@ BOOL utf = (md->poptions & PCRE_UTF8) != 0; BOOL utf = FALSE; #endif +BOOL reset_could_continue = FALSE; + rlevel++; offsetcount &= (-2); @@ -571,7 +573,9 @@ for (;;) int clen, dlen; unsigned int c, d; int forced_fail = 0; - BOOL could_continue = FALSE; + BOOL partial_newline = FALSE; + BOOL could_continue = reset_could_continue; + reset_could_continue = FALSE; /* Make the new state list into the active state list and empty the new state list. */ @@ -607,7 +611,7 @@ for (;;) if (ptr < end_subject) { - clen = 1; /* Number of bytes in the character */ + clen = 1; /* Number of data items in the character */ #ifdef SUPPORT_UTF if (utf) { GETCHARLEN(c, ptr, clen); } else #endif /* SUPPORT_UTF */ @@ -641,7 +645,8 @@ for (;;) /* A negative offset is a special case meaning "hold off going to this (negated) state until the number of characters in the data field have - been skipped". */ + been skipped". If the could_continue flag was passed over from a previous + state, arrange for it to passed on. */ if (state_offset < 0) { @@ -650,6 +655,7 @@ for (;;) DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP)); ADD_NEW_DATA(state_offset, current_state->count, current_state->data - 1); + if (could_continue) reset_could_continue = TRUE; continue; } else @@ -689,10 +695,10 @@ for (;;) permitted. We also use this mechanism for opcodes such as OP_TYPEPLUS that take an - argument that is not a data character - but is always one byte long. We - have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in - this case. To keep the other cases fast, convert these ones to new opcodes. - */ + argument that is not a data character - but is always one byte long because + the values are small. We have to take special action to deal with \P, \p, + \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert + these ones to new opcodes. */ if (coptable[codevalue] > 0) { @@ -783,7 +789,7 @@ for (;;) offsets[0] = (int)(current_subject - start_subject); offsets[1] = (int)(ptr - start_subject); DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP, - offsets[1] - offsets[0], current_subject)); + offsets[1] - offsets[0], (char *)current_subject)); } if ((md->moptions & PCRE_DFA_SHORTEST) != 0) { @@ -888,7 +894,20 @@ for (;;) /*-----------------------------------------------------------------*/ case OP_ANY: if (clen > 0 && !IS_NEWLINE(ptr)) - { ADD_NEW(state_offset + 1, 0); } + { + if (ptr + 1 >= md->end_subject && + (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else + { + ADD_NEW(state_offset + 1, 0); + } + } break; /*-----------------------------------------------------------------*/ @@ -916,6 +935,19 @@ for (;;) (ptr == end_subject - md->nllen) )) { ADD_ACTIVE(state_offset + 1, 0); } + else if (ptr + 1 >= md->end_subject && + (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + if ((md->moptions & PCRE_PARTIAL_HARD) != 0) + { + reset_could_continue = TRUE; + ADD_NEW_DATA(-(state_offset + 1), 0, 1); + } + else could_continue = partial_newline = TRUE; + } } break; @@ -928,6 +960,19 @@ for (;;) else if (clen == 0 || ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr))) { ADD_ACTIVE(state_offset + 1, 0); } + else if (ptr + 1 >= md->end_subject && + (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + if ((md->moptions & PCRE_PARTIAL_HARD) != 0) + { + reset_could_continue = TRUE; + ADD_NEW_DATA(-(state_offset + 1), 0, 1); + } + else could_continue = partial_newline = TRUE; + } } else if (IS_NEWLINE(ptr)) { ADD_ACTIVE(state_offset + 1, 0); } @@ -1090,7 +1135,15 @@ for (;;) if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } if (clen > 0) { - if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + if (d == OP_ANY && ptr + 1 >= md->end_subject && + (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || (c < 256 && (d != OP_ANY || !IS_NEWLINE(ptr)) && ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) @@ -1113,7 +1166,15 @@ for (;;) ADD_ACTIVE(state_offset + 2, 0); if (clen > 0) { - if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + if (d == OP_ANY && ptr + 1 >= md->end_subject && + (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || (c < 256 && (d != OP_ANY || !IS_NEWLINE(ptr)) && ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) @@ -1135,7 +1196,15 @@ for (;;) ADD_ACTIVE(state_offset + 2, 0); if (clen > 0) { - if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + if (d == OP_ANY && ptr + 1 >= md->end_subject && + (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || (c < 256 && (d != OP_ANY || !IS_NEWLINE(ptr)) && ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) @@ -1155,7 +1224,15 @@ for (;;) count = current_state->count; /* Number already matched */ if (clen > 0) { - if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + if (d == OP_ANY && ptr + 1 >= md->end_subject && + (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || (c < 256 && (d != OP_ANY || !IS_NEWLINE(ptr)) && ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) @@ -1176,7 +1253,15 @@ for (;;) count = current_state->count; /* Number already matched */ if (clen > 0) { - if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + if (d == OP_ANY && ptr + 1 >= md->end_subject && + (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || (c < 256 && (d != OP_ANY || !IS_NEWLINE(ptr)) && ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) @@ -1824,6 +1909,8 @@ for (;;) ncount++; nptr += ndlen; } + if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0) + reset_could_continue = TRUE; if (++count >= GET2(code, 1)) { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } else @@ -2037,6 +2124,8 @@ for (;;) ncount++; nptr += nclen; } + if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0) + reset_could_continue = TRUE; ADD_NEW_DATA(-(state_offset + 1), 0, ncount); } break; @@ -2062,7 +2151,13 @@ for (;;) break; case 0x000d: - if (ptr + 1 < end_subject && ptr[1] == 0x0a) + if (ptr + 1 >= end_subject) + { + ADD_NEW(state_offset + 1, 0); + if ((md->moptions & PCRE_PARTIAL_HARD) != 0) + reset_could_continue = TRUE; + } + else if (ptr[1] == 0x0a) { ADD_NEW_DATA(-(state_offset + 1), 0, 1); } @@ -2171,22 +2266,32 @@ for (;;) break; /*-----------------------------------------------------------------*/ - /* Match a negated single character casefully. This is only used for - one-byte characters, that is, we know that d < 256. The character we are - checking (c) can be multibyte. */ + /* Match a negated single character casefully. */ case OP_NOT: if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); } break; /*-----------------------------------------------------------------*/ - /* Match a negated single character caselessly. This is only used for - one-byte characters, that is, we know that d < 256. The character we are - checking (c) can be multibyte. */ + /* Match a negated single character caselessly. */ case OP_NOTI: - if (clen > 0 && c != d && c != fcc[d]) - { ADD_NEW(state_offset + dlen + 1, 0); } + if (clen > 0) + { + unsigned int otherd; +#ifdef SUPPORT_UTF + if (utf && d >= 128) + { +#ifdef SUPPORT_UCP + otherd = UCD_OTHERCASE(d); +#endif /* SUPPORT_UCP */ + } + else +#endif /* SUPPORT_UTF */ + otherd = TABLE_GET(d, fcc, d); + if (c != d && c != otherd) + { ADD_NEW(state_offset + dlen + 1, 0); } + } break; /*-----------------------------------------------------------------*/ @@ -2692,9 +2797,12 @@ for (;;) { int charcount = local_offsets[rc+1] - local_offsets[rc]; #ifdef SUPPORT_UTF - const pcre_uchar *p = start_subject + local_offsets[rc]; - const pcre_uchar *pp = start_subject + local_offsets[rc+1]; - while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; + if (utf) + { + const pcre_uchar *p = start_subject + local_offsets[rc]; + const pcre_uchar *pp = start_subject + local_offsets[rc+1]; + while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; + } #endif if (charcount > 0) { @@ -2793,7 +2901,7 @@ for (;;) const pcre_uchar *pp = local_ptr; charcount = (int)(pp - p); #ifdef SUPPORT_UTF - while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; + if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; #endif ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); } @@ -2875,9 +2983,12 @@ for (;;) else { #ifdef SUPPORT_UTF - const pcre_uchar *p = start_subject + local_offsets[0]; - const pcre_uchar *pp = start_subject + local_offsets[1]; - while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; + if (utf) + { + const pcre_uchar *p = start_subject + local_offsets[0]; + const pcre_uchar *pp = start_subject + local_offsets[1]; + while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; + } #endif ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); if (repeat_state_offset >= 0) @@ -2946,7 +3057,7 @@ for (;;) if (new_count <= 0) { if (rlevel == 1 && /* Top level, and */ - could_continue && /* Some could go on */ + could_continue && /* Some could go on, and */ forced_fail != workspace[1] && /* Not all forced fail & */ ( /* either... */ (md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */ @@ -2954,8 +3065,13 @@ for (;;) ((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */ match_count < 0) /* no matches */ ) && /* And... */ - ptr >= end_subject && /* Reached end of subject */ - ptr > md->start_used_ptr) /* Inspected non-empty string */ + ( + partial_newline || /* Either partial NL */ + ( /* or ... */ + ptr >= end_subject && /* End of subject and */ + ptr > md->start_used_ptr) /* Inspected non-empty string */ + ) + ) { if (offsetcount >= 2) { @@ -3052,10 +3168,27 @@ if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE; if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; -/* We need to find the pointer to any study data before we test for byte -flipping, so we scan the extra_data block first. This may set two fields in the -match block, so we must initialize them beforehand. However, the other fields -in the match block must not be set until after the byte flipping. */ +/* Check that the first field in the block is the magic number. If it is not, +return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to +REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which +means that the pattern is likely compiled with different endianness. */ + +if (re->magic_number != MAGIC_NUMBER) + return re->magic_number == REVERSED_MAGIC_NUMBER? + PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; +if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; + +/* If restarting after a partial match, do some sanity checks on the contents +of the workspace. */ + +if ((options & PCRE_DFA_RESTART) != 0) + { + if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 || + workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK) + return PCRE_ERROR_DFA_BADRESTART; + } + +/* Set up study, callout, and table data */ md->tables = re->tables; md->callout_data = NULL; @@ -3074,16 +3207,6 @@ if (extra_data != NULL) md->tables = extra_data->tables; } -/* Check that the first field in the block is the magic number. If it is not, -return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to -REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which -means that the pattern is likely compiled with different endianness. */ - -if (re->magic_number != MAGIC_NUMBER) - return re->magic_number == REVERSED_MAGIC_NUMBER? - PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; -if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; - /* Set some local values */ current_subject = (const pcre_uchar *)subject + start_offset; diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c index 8eb3162b2..4eb27f084 100644 --- a/glib/pcre/pcre_exec.c +++ b/glib/pcre/pcre_exec.c @@ -37,7 +37,6 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ - /* This module contains pcre_exec(), the externally visible function that does pattern matching using an NFA algorithm, trying to mimic Perl as closely as possible. There are also some static supporting functions. */ @@ -140,7 +139,9 @@ Arguments: md points to match data block caseless TRUE if caseless -Returns: < 0 if not matched, otherwise the number of subject bytes matched +Returns: >= 0 the number of subject bytes matched + -1 no match + -2 partial match; always given if at end subject */ static int @@ -163,7 +164,8 @@ pchars(p, length, FALSE, md); printf("\n"); #endif -/* Always fail if reference not set (and not JavaScript compatible). */ +/* Always fail if reference not set (and not JavaScript compatible - in that +case the length is passed as zero). */ if (length < 0) return -1; @@ -189,7 +191,7 @@ if (caseless) while (p < endptr) { int c, d; - if (eptr >= md->end_subject) return -1; + if (eptr >= md->end_subject) return -2; /* Partial match */ GETCHARINC(c, eptr); GETCHARINC(d, p); if (c != d && c != UCD_OTHERCASE(d)) return -1; @@ -202,9 +204,9 @@ if (caseless) /* The same code works when not in UTF-8 mode and in UTF-8 mode when there is no UCP support. */ { - if (eptr + length > md->end_subject) return -1; while (length-- > 0) { + if (eptr >= md->end_subject) return -2; /* Partial match */ if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1; p++; eptr++; @@ -217,8 +219,11 @@ are in UTF-8 mode. */ else { - if (eptr + length > md->end_subject) return -1; - while (length-- > 0) if (*p++ != *eptr++) return -1; + while (length-- > 0) + { + if (eptr >= md->end_subject) return -2; /* Partial match */ + if (*p++ != *eptr++) return -1; + } } return (int)(eptr - eptr_start); @@ -311,9 +316,15 @@ argument of match(), which never changes. */ #define RMATCH(ra,rb,rc,rd,re,rw)\ {\ - heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\ - if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ - frame->Xwhere = rw; \ + heapframe *newframe = frame->Xnextframe;\ + if (newframe == NULL)\ + {\ + newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\ + if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ + newframe->Xnextframe = NULL;\ + frame->Xnextframe = newframe;\ + }\ + frame->Xwhere = rw;\ newframe->Xeptr = ra;\ newframe->Xecode = rb;\ newframe->Xmstart = mstart;\ @@ -332,7 +343,6 @@ argument of match(), which never changes. */ {\ heapframe *oldframe = frame;\ frame = oldframe->Xprevframe;\ - if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\ if (frame != NULL)\ {\ rrc = ra;\ @@ -346,6 +356,7 @@ argument of match(), which never changes. */ typedef struct heapframe { struct heapframe *Xprevframe; + struct heapframe *Xnextframe; /* Function arguments that may change */ @@ -492,9 +503,7 @@ the top-level on the stack rather than malloc-ing them all gives a performance boost in many cases where there is not much "recursion". */ #ifdef NO_RECURSE -heapframe frame_zero; -heapframe *frame = &frame_zero; -frame->Xprevframe = NULL; /* Marks the top level */ +heapframe *frame = (heapframe *)md->match_frames_base; /* Copy in the original argument variables */ @@ -897,7 +906,6 @@ for (;;) } else /* OP_KETRMAX */ { - md->match_function_type = MATCH_CBEGROUP; RMATCH(eptr, prev, offset_top, md, eptrb, RM66); if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode += 1 + LINK_SIZE; @@ -1026,7 +1034,8 @@ for (;;) for (;;) { - if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP; + if (op >= OP_SBRA || op == OP_ONCE) + md->match_function_type = MATCH_CBEGROUP; /* If this is not a possibly empty group, and there are no (*THEN)s in the pattern, and this is the final alternative, optimize as described @@ -1565,13 +1574,18 @@ for (;;) mstart = md->start_match_ptr; /* In case \K reset it */ break; } + md->mark = save_mark; - /* PCRE does not allow THEN to escape beyond an assertion; it is treated - as NOMATCH. */ + /* A COMMIT failure must fail the entire assertion, without trying any + subsequent branches. */ + + if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH); + + /* PCRE does not allow THEN to escape beyond an assertion; it + is treated as NOMATCH. */ if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); ecode += GET(ecode, 1); - md->mark = save_mark; } while (*ecode == OP_ALT); @@ -1779,10 +1793,11 @@ for (;;) goto RECURSION_MATCHED; /* Exit loop; end processing */ } - /* PCRE does not allow THEN to escape beyond a recursion; it is treated - as NOMATCH. */ + /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it + is treated as NOMATCH. */ - else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) + else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN && + rrc != MATCH_COMMIT) { DPRINTF(("Recursion gave error %d\n", rrc)); if (new_recursive.offset_save != stacksave) @@ -1993,7 +2008,6 @@ for (;;) } if (*prev >= OP_SBRA) /* Could match an empty string */ { - md->match_function_type = MATCH_CBEGROUP; RMATCH(eptr, prev, offset_top, md, eptrb, RM50); RRETURN(rrc); } @@ -2002,7 +2016,6 @@ for (;;) } else /* OP_KETRMAX */ { - if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; RMATCH(eptr, prev, offset_top, md, eptrb, RM13); if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH; if (rrc != MATCH_NOMATCH) RRETURN(rrc); @@ -2059,7 +2072,21 @@ for (;;) case OP_DOLLM: if (eptr < md->end_subject) - { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); } + { + if (!IS_NEWLINE(eptr)) + { + if (md->partial != 0 && + eptr + 1 >= md->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *eptr == NLBLOCK->nl[0]) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } + RRETURN(MATCH_NOMATCH); + } + } else { if (md->noteol) RRETURN(MATCH_NOMATCH); @@ -2091,7 +2118,18 @@ for (;;) ASSERT_NL_OR_EOS: if (eptr < md->end_subject && (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) + { + if (md->partial != 0 && + eptr + 1 >= md->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *eptr == NLBLOCK->nl[0]) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } RRETURN(MATCH_NOMATCH); + } /* Either at end of string or \n before end. */ @@ -2219,12 +2257,25 @@ for (;;) } break; - /* Match a single character type; inline for speed */ + /* Match any single character type except newline; have to take care with + CRLF newlines and partial matching. */ case OP_ANY: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); + if (md->partial != 0 && + eptr + 1 >= md->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *eptr == NLBLOCK->nl[0]) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } + /* Fall through */ + /* Match any single character whatsoever. */ + case OP_ALLANY: if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ { /* not be updated before SCHECK_PARTIAL. */ @@ -2365,7 +2416,11 @@ for (;;) default: RRETURN(MATCH_NOMATCH); case 0x000d: - if (eptr < md->end_subject && *eptr == 0x0a) eptr++; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + } + else if (*eptr == 0x0a) eptr++; break; case 0x000a: @@ -2595,6 +2650,7 @@ for (;;) if (UCD_CATEGORY(c) != ucp_M) break; eptr += len; } + CHECK_PARTIAL(); ecode++; break; #endif @@ -2660,6 +2716,7 @@ for (;;) default: /* No repeat follows */ if ((length = match_ref(offset, eptr, length, md, caseless)) < 0) { + if (length == -2) eptr = md->end_subject; /* Partial match */ CHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } @@ -2685,6 +2742,7 @@ for (;;) int slength; if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) { + if (slength == -2) eptr = md->end_subject; /* Partial match */ CHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } @@ -2708,6 +2766,7 @@ for (;;) if (fi >= max) RRETURN(MATCH_NOMATCH); if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) { + if (slength == -2) eptr = md->end_subject; /* Partial match */ CHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } @@ -2726,11 +2785,20 @@ for (;;) int slength; if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) { - CHECK_PARTIAL(); + /* Can't use CHECK_PARTIAL because we don't want to update eptr in + the soft partial matching case. */ + + if (slength == -2 && md->partial != 0 && + md->end_subject > md->start_used_ptr) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } break; } eptr += slength; } + while (eptr >= pp) { RMATCH(eptr, ecode, offset_top, md, eptrb, RM15); @@ -3360,7 +3428,7 @@ for (;;) maximizing, find the maximum number of characters and work backwards. */ DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, - max, eptr)); + max, (char *)eptr)); if (op >= OP_STARI) /* Caseless */ { @@ -3504,33 +3572,41 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - ecode++; - GETCHARINCTEST(c, eptr); - if (op == OP_NOTI) /* The caseless case */ +#ifdef SUPPORT_UTF + if (utf) { register unsigned int ch, och; - ch = *ecode++; -#ifdef COMPILE_PCRE8 - /* ch must be < 128 if UTF is enabled. */ - och = md->fcc[ch]; -#else -#ifdef SUPPORT_UTF -#ifdef SUPPORT_UCP - if (utf && ch > 127) - och = UCD_OTHERCASE(ch); -#else - if (utf && ch > 127) - och = ch; -#endif /* SUPPORT_UCP */ + + ecode++; + GETCHARINC(ch, ecode); + GETCHARINC(c, eptr); + + if (op == OP_NOT) + { + if (ch == c) RRETURN(MATCH_NOMATCH); + } else -#endif /* SUPPORT_UTF */ - och = TABLE_GET(ch, md->fcc, ch); -#endif /* COMPILE_PCRE8 */ - if (ch == c || och == c) RRETURN(MATCH_NOMATCH); + { +#ifdef SUPPORT_UCP + if (ch > 127) + och = UCD_OTHERCASE(ch); +#else + if (ch > 127) + och = ch; +#endif /* SUPPORT_UCP */ + else + och = TABLE_GET(ch, md->fcc, ch); + if (ch == c || och == c) RRETURN(MATCH_NOMATCH); + } } - else /* Caseful */ + else +#endif { - if (*ecode++ == c) RRETURN(MATCH_NOMATCH); + register unsigned int ch = ecode[1]; + c = *eptr++; + if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c)) + RRETURN(MATCH_NOMATCH); + ecode += 2; } break; @@ -3610,7 +3686,7 @@ for (;;) /* Common code for all repeated single-byte matches. */ REPEATNOTCHAR: - fc = *ecode++; + GETCHARINCTEST(fc, ecode); /* The code is duplicated for the caseless and caseful cases, for speed, since matching characters is likely to be quite common. First, ensure the @@ -3621,14 +3697,10 @@ for (;;) characters and work backwards. */ DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, - max, eptr)); + max, (char *)eptr)); if (op >= OP_NOTSTARI) /* Caseless */ { -#ifdef COMPILE_PCRE8 - /* fc must be < 128 if UTF is enabled. */ - foc = md->fcc[fc]; -#else #ifdef SUPPORT_UTF #ifdef SUPPORT_UCP if (utf && fc > 127) @@ -3640,7 +3712,6 @@ for (;;) else #endif /* SUPPORT_UTF */ foc = TABLE_GET(fc, md->fcc, fc); -#endif /* COMPILE_PCRE8 */ #ifdef SUPPORT_UTF if (utf) @@ -3654,7 +3725,7 @@ for (;;) RRETURN(MATCH_NOMATCH); } GETCHARINC(d, eptr); - if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH); + if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH); } } else @@ -4164,6 +4235,7 @@ for (;;) if (UCD_CATEGORY(c) != ucp_M) break; eptr += len; } + CHECK_PARTIAL(); } } @@ -4184,6 +4256,15 @@ for (;;) RRETURN(MATCH_NOMATCH); } if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); + if (md->partial != 0 && + eptr + 1 >= md->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *eptr == NLBLOCK->nl[0]) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } eptr++; ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); } @@ -4468,6 +4549,15 @@ for (;;) RRETURN(MATCH_NOMATCH); } if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); + if (md->partial != 0 && + eptr + 1 >= md->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *eptr == NLBLOCK->nl[0]) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } eptr++; } break; @@ -4948,6 +5038,7 @@ for (;;) if (UCD_CATEGORY(c) != ucp_M) break; eptr += len; } + CHECK_PARTIAL(); } } else @@ -4971,7 +5062,18 @@ for (;;) GETCHARINC(c, eptr); switch(ctype) { - case OP_ANY: /* This is the non-NL case */ + case OP_ANY: /* This is the non-NL case */ + if (md->partial != 0 && /* Take care with CRLF partial */ + eptr >= md->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } + break; + case OP_ALLANY: case OP_ANYBYTE: break; @@ -5134,7 +5236,18 @@ for (;;) c = *eptr++; switch(ctype) { - case OP_ANY: /* This is the non-NL case */ + case OP_ANY: /* This is the non-NL case */ + if (md->partial != 0 && /* Take care with CRLF partial */ + eptr >= md->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } + break; + case OP_ALLANY: case OP_ANYBYTE: break; @@ -5491,6 +5604,7 @@ for (;;) if (UCD_CATEGORY(c) != ucp_M) break; eptr += len; } + CHECK_PARTIAL(); } /* eptr is now past the end of the maximum run */ @@ -5534,6 +5648,15 @@ for (;;) break; } if (IS_NEWLINE(eptr)) break; + if (md->partial != 0 && /* Take care with CRLF partial */ + eptr + 1 >= md->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *eptr == NLBLOCK->nl[0]) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } eptr++; ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); } @@ -5551,6 +5674,15 @@ for (;;) break; } if (IS_NEWLINE(eptr)) break; + if (md->partial != 0 && /* Take care with CRLF partial */ + eptr + 1 >= md->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *eptr == NLBLOCK->nl[0]) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } eptr++; ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); } @@ -5815,6 +5947,15 @@ for (;;) break; } if (IS_NEWLINE(eptr)) break; + if (md->partial != 0 && /* Take care with CRLF partial */ + eptr + 1 >= md->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *eptr == NLBLOCK->nl[0]) + { + md->hitend = TRUE; + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); + } eptr++; } break; @@ -6145,6 +6286,31 @@ Undefine all the macros that were defined above to handle this. */ ***************************************************************************/ +#ifdef NO_RECURSE +/************************************************* +* Release allocated heap frames * +*************************************************/ + +/* This function releases all the allocated frames. The base frame is on the +machine stack, and so must not be freed. + +Argument: the address of the base frame +Returns: nothing +*/ + +static void +release_match_heapframes (heapframe *frame_base) +{ +heapframe *nextframe = frame_base->Xnextframe; +while (nextframe != NULL) + { + heapframe *oldframe = nextframe; + nextframe = nextframe->Xnextframe; + (PUBL(stack_free))(oldframe); + } +} +#endif + /************************************************* * Execute a Regular Expression * @@ -6207,13 +6373,22 @@ PCRE_PUCHAR req_char_ptr = start_match - 1; const pcre_study_data *study; const REAL_PCRE *re = (const REAL_PCRE *)argument_re; +#ifdef NO_RECURSE +heapframe frame_zero; +frame_zero.Xprevframe = NULL; /* Marks the top level */ +frame_zero.Xnextframe = NULL; /* None are allocated yet */ +md->match_frames_base = &frame_zero; +#endif + /* Check for the special magic call that measures the size of the stack used -per recursive call of match(). */ +per recursive call of match(). Without the funny casting for sizeof, a Windows +compiler gave this error: "unary minus operator applied to unsigned type, +result still unsigned". Hopefully the cast fixes that. */ if (re == NULL && extra_data == NULL && subject == NULL && length == -999 && start_offset == -999) #ifdef NO_RECURSE - return -sizeof(heapframe); + return -((int)sizeof(heapframe)); #else return match(NULL, NULL, NULL, 0, NULL, NULL, 0); #endif @@ -6280,20 +6455,25 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) /* If the pattern was successfully studied with JIT support, run the JIT executable instead of the rest of this function. Most options must be set at compile time for the JIT code to be usable. Fallback to the normal code path if -an unsupported flag is set. In particular, JIT does not support partial -matching. */ +an unsupported flag is set. */ #ifdef SUPPORT_JIT if (extra_data != NULL - && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 + && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT | + PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT && extra_data->executable_jit != NULL - && (extra_data->flags & PCRE_EXTRA_TABLES) == 0 && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL | - PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0) - return PRIV(jit_exec)(re, extra_data->executable_jit, - (const pcre_uchar *)subject, length, start_offset, options, - ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) - ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount); + PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | + PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0) + { + rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length, + start_offset, options, offsets, offsetcount); + + /* PCRE_ERROR_NULL means that the selected normal or partial matching + mode is not compiled. In this case we simply fallback to interpreter. */ + + if (rc != PCRE_ERROR_NULL) return rc; + } #endif /* Carry on with non-JIT matching. This information is for finding all the @@ -6887,7 +7067,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) { register int *iptr, *iend; int resetcount = 2 + re->top_bracket * 2; - if (resetcount > offsetcount) resetcount = ocount; + if (resetcount > offsetcount) resetcount = offsetcount; iptr = offsets + md->end_offset_top; iend = offsets + resetcount; while (iptr < iend) *iptr++ = -1; @@ -6908,6 +7088,9 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0) *(extra_data->mark) = (pcre_uchar *)md->mark; DPRINTF((">>>> returning %d\n", rc)); +#ifdef NO_RECURSE + release_match_heapframes(&frame_zero); +#endif return rc; } @@ -6925,6 +7108,9 @@ if (using_temporary_offsets) if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL) { DPRINTF((">>>> error: returning %d\n", rc)); +#ifdef NO_RECURSE + release_match_heapframes(&frame_zero); +#endif return rc; } @@ -6954,6 +7140,9 @@ else if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0) *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark; +#ifdef NO_RECURSE + release_match_heapframes(&frame_zero); +#endif return rc; } diff --git a/glib/pcre/pcre_fullinfo.c b/glib/pcre/pcre_fullinfo.c index a3d1198b0..7a7db110d 100644 --- a/glib/pcre/pcre_fullinfo.c +++ b/glib/pcre/pcre_fullinfo.c @@ -193,6 +193,10 @@ switch (what) *((int *)where) = (re->flags & PCRE_HASCRORLF) != 0; break; + case PCRE_INFO_MAXLOOKBEHIND: + *((int *)where) = re->max_lookbehind; + break; + default: return PCRE_ERROR_BADOPTION; } diff --git a/glib/pcre/pcre_globals.c b/glib/pcre/pcre_globals.c index 93d3af533..534660c77 100644 --- a/glib/pcre/pcre_globals.c +++ b/glib/pcre/pcre_globals.c @@ -58,7 +58,11 @@ global variables are not used. */ #include "pcre_internal.h" +#ifdef GLIB_COMPILATION #include "gmem.h" +#else +#include +#endif /* GLIB_COMPILATION */ #if defined _MSC_VER || defined __SYMBIAN32__ static void* LocalPcreMalloc(size_t aSize) diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h index 41c7ee3f5..9918e71a4 100644 --- a/glib/pcre/pcre_internal.h +++ b/glib/pcre/pcre_internal.h @@ -523,6 +523,11 @@ capturing parenthesis numbers in back references. */ #define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE +/* The maximum length of a MARK name is currently one data unit; it may be +changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */ + +#define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1) + /* When UTF encoding is being used, a character is no longer just a single character. The macros for character handling generate simple sequences when used in character-mode, and more complicated ones for UTF characters. @@ -887,7 +892,8 @@ time, run time, or study time, respectively. */ PCRE_NO_START_OPTIMIZE) #define PUBLIC_STUDY_OPTIONS \ - PCRE_STUDY_JIT_COMPILE + (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \ + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) /* Magic number to provide a small check against being handed junk. */ @@ -1939,7 +1945,11 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, - ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT }; + ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT }; + +/* JIT compiling modes. The function list is indexed by them. */ +enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, + JIT_NUMBER_OF_COMPILE_MODES }; /* The real format of the start of the pcre block; the index of names and the code vector run on as long as necessary after the end. We store an explicit @@ -1969,16 +1979,15 @@ typedef struct REAL_PCRE { pcre_uint32 size; /* Total that was malloced */ pcre_uint32 options; /* Public options */ pcre_uint16 flags; /* Private flags */ - pcre_uint16 dummy1; /* For future use */ - pcre_uint16 top_bracket; - pcre_uint16 top_backref; + pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ + pcre_uint16 top_bracket; /* Highest numbered group */ + pcre_uint16 top_backref; /* Highest numbered back reference */ pcre_uint16 first_char; /* Starting character */ pcre_uint16 req_char; /* This character must be seen */ pcre_uint16 name_table_offset; /* Offset to name table that follows */ pcre_uint16 name_entry_size; /* Size of any name items */ pcre_uint16 name_count; /* Number of name items */ pcre_uint16 ref_count; /* Reference count */ - const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ const pcre_uint8 *nullpad; /* NULL padding */ } REAL_PCRE; @@ -2024,6 +2033,7 @@ typedef struct compile_data { int workspace_size; /* Size of workspace */ int bracount; /* Count of capturing parens as we compile */ int final_bracount; /* Saved value after first pass */ + int max_lookbehind; /* Maximum lookbehind (characters) */ int top_backref; /* Maximum back reference */ unsigned int backref_map; /* Bitmap of low back refs */ int assert_depth; /* Depth of nested assertions */ @@ -2125,6 +2135,9 @@ typedef struct match_data { const pcre_uchar *mark; /* Mark pointer to pass back on success */ const pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */ const pcre_uchar *once_target; /* Where to back up to for atomic groups */ +#ifdef NO_RECURSE + void *match_frames_base; /* For remembering malloc'd frames */ +#endif } match_data; /* A similar structure is used for the same purpose by the DFA matching @@ -2179,7 +2192,7 @@ total length. */ #define ctypes_offset (cbits_offset + cbit_length) #define tables_length (ctypes_offset + 256) -/* Internal function prefix */ +/* Internal function and data prefixes. */ #ifdef COMPILE_PCRE8 #ifndef PUBL @@ -2288,9 +2301,10 @@ extern BOOL PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR, extern BOOL PRIV(xclass)(int, const pcre_uchar *, BOOL); #ifdef SUPPORT_JIT -extern void PRIV(jit_compile)(const REAL_PCRE *, PUBL(extra) *); -extern int PRIV(jit_exec)(const REAL_PCRE *, void *, - const pcre_uchar *, int, int, int, int, int *, int); +extern void PRIV(jit_compile)(const REAL_PCRE *, + PUBL(extra) *, int); +extern int PRIV(jit_exec)(const REAL_PCRE *, const PUBL(extra) *, + const pcre_uchar *, int, int, int, int *, int); extern void PRIV(jit_free)(void *); extern int PRIV(jit_get_size)(void *); extern const char* PRIV(jit_get_target)(void); @@ -2298,15 +2312,6 @@ extern const char* PRIV(jit_get_target)(void); /* Unicode character database (UCD) */ -typedef struct { - pcre_uint8 script; - pcre_uint8 chartype; - pcre_int32 other_case; -} ucd_record; - -extern const ucd_record PRIV(ucd_records)[]; -extern const pcre_uint8 PRIV(ucd_stage1)[]; -extern const pcre_uint16 PRIV(ucd_stage2)[]; extern const int PRIV(ucp_gentype)[]; #ifdef SUPPORT_JIT extern const int PRIV(ucp_typerange)[]; diff --git a/glib/pcre/pcre_jit_compile.c b/glib/pcre/pcre_jit_compile.c index 97d227c8f..acb7ea22a 100644 --- a/glib/pcre/pcre_jit_compile.c +++ b/glib/pcre/pcre_jit_compile.c @@ -82,23 +82,23 @@ The code generator follows the recursive nature of the PERL compatible regular expressions. The basic blocks of regular expressions are condition checkers whose execute different commands depending on the result of the condition check. The relationship between the operators can be horizontal (concatenation) and -vertical (sub-expression) (See struct fallback_common for more details). +vertical (sub-expression) (See struct backtrack_common for more details). 'ab' - 'a' and 'b' regexps are concatenated 'a+' - 'a' is the sub-expression of the '+' operator The condition checkers are boolean (true/false) checkers. Machine code is generated for the checker itself and for the actions depending on the result of the checker. -The 'true' case is called as the hot path (expected path), and the other is called as -the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken -branches on the hot path. +The 'true' case is called as the try path (expected path), and the other is called as +the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken +branches on the try path. Greedy star operator (*) : - Hot path: match happens. - Fallback path: match failed. + Try path: match happens. + Backtrack path: match failed. Non-greedy star operator (*?) : - Hot path: no need to perform a match. - Fallback path: match is required. + Try path: no need to perform a match. + Backtrack path: match is required. The following example shows how the code generated for a capturing bracket with two alternatives. Let A, B, C, D are arbirary regular expressions, and @@ -108,34 +108,34 @@ we have the following regular expression: The generated code will be the following: - A hot path - '(' hot path (pushing arguments to the stack) - B hot path - ')' hot path (pushing arguments to the stack) - D hot path + A try path + '(' try path (pushing arguments to the stack) + B try path + ')' try path (pushing arguments to the stack) + D try path return with successful match - D fallback path - ')' fallback path (If we arrived from "C" jump to the fallback of "C") - B fallback path + D backtrack path + ')' backtrack path (If we arrived from "C" jump to the backtrack of "C") + B backtrack path C expected path - jump to D hot path - C fallback path - A fallback path + jump to D try path + C backtrack path + A backtrack path - Notice, that the order of fallback code paths are the opposite of the fast + Notice, that the order of backtrack code paths are the opposite of the fast code paths. In this way the topmost value on the stack is always belong - to the current fallback code path. The fallback code path must check + to the current backtrack code path. The backtrack path must check whether there is a next alternative. If so, it needs to jump back to - the hot path eventually. Otherwise it needs to clear out its own stack - frame and continue the execution on the fallback code paths. + the try path eventually. Otherwise it needs to clear out its own stack + frame and continue the execution on the backtrack code paths. */ /* Saved stack frames: Atomic blocks and asserts require reloading the values of local variables -when the fallback mechanism performed. Because of OP_RECURSE, the locals +when the backtrack mechanism performed. Because of OP_RECURSE, the locals are not necessarly known in compile time, thus we need a dynamic restore mechanism. @@ -152,7 +152,8 @@ typedef struct jit_arguments { const pcre_uchar *begin; const pcre_uchar *end; int *offsets; - pcre_uchar *ptr; + pcre_uchar *uchar_ptr; + pcre_uchar *mark_ptr; /* Everything else after. */ int offsetcount; int calllimit; @@ -162,12 +163,12 @@ typedef struct jit_arguments { pcre_uint8 notempty_atstart; } jit_arguments; -typedef struct executable_function { - void *executable_func; +typedef struct executable_functions { + void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; PUBL(jit_callback) callback; void *userdata; - sljit_uw executable_size; -} executable_function; + sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; +} executable_functions; typedef struct jump_list { struct sljit_jump *jump; @@ -187,71 +188,71 @@ typedef struct stub_list { typedef int (SLJIT_CALL *jit_function)(jit_arguments *args); /* The following structure is the key data type for the recursive -code generator. It is allocated by compile_hotpath, and contains -the aguments for compile_fallbackpath. Must be the first member +code generator. It is allocated by compile_trypath, and contains +the aguments for compile_backtrackpath. Must be the first member of its descendants. */ -typedef struct fallback_common { +typedef struct backtrack_common { /* Concatenation stack. */ - struct fallback_common *prev; - jump_list *nextfallbacks; + struct backtrack_common *prev; + jump_list *nextbacktracks; /* Internal stack (for component operators). */ - struct fallback_common *top; - jump_list *topfallbacks; + struct backtrack_common *top; + jump_list *topbacktracks; /* Opcode pointer. */ pcre_uchar *cc; -} fallback_common; +} backtrack_common; -typedef struct assert_fallback { - fallback_common common; +typedef struct assert_backtrack { + backtrack_common common; jump_list *condfailed; /* Less than 0 (-1) if a frame is not needed. */ int framesize; /* Points to our private memory word on the stack. */ int localptr; /* For iterators. */ - struct sljit_label *hotpath; -} assert_fallback; + struct sljit_label *trypath; +} assert_backtrack; -typedef struct bracket_fallback { - fallback_common common; +typedef struct bracket_backtrack { + backtrack_common common; /* Where to coninue if an alternative is successfully matched. */ - struct sljit_label *althotpath; + struct sljit_label *alttrypath; /* For rmin and rmax iterators. */ - struct sljit_label *recursivehotpath; + struct sljit_label *recursivetrypath; /* For greedy ? operator. */ - struct sljit_label *zerohotpath; + struct sljit_label *zerotrypath; /* Contains the branches of a failed condition. */ union { /* Both for OP_COND, OP_SCOND. */ jump_list *condfailed; - assert_fallback *assert; + assert_backtrack *assert; /* For OP_ONCE. -1 if not needed. */ int framesize; } u; /* Points to our private memory word on the stack. */ int localptr; -} bracket_fallback; +} bracket_backtrack; -typedef struct bracketpos_fallback { - fallback_common common; +typedef struct bracketpos_backtrack { + backtrack_common common; /* Points to our private memory word on the stack. */ int localptr; /* Reverting stack is needed. */ int framesize; /* Allocated stack size. */ int stacksize; -} bracketpos_fallback; +} bracketpos_backtrack; -typedef struct braminzero_fallback { - fallback_common common; - struct sljit_label *hotpath; -} braminzero_fallback; +typedef struct braminzero_backtrack { + backtrack_common common; + struct sljit_label *trypath; +} braminzero_backtrack; -typedef struct iterator_fallback { - fallback_common common; +typedef struct iterator_backtrack { + backtrack_common common; /* Next iteration. */ - struct sljit_label *hotpath; -} iterator_fallback; + struct sljit_label *trypath; +} iterator_backtrack; typedef struct recurse_entry { struct recurse_entry *next; @@ -263,30 +264,55 @@ typedef struct recurse_entry { int start; } recurse_entry; -typedef struct recurse_fallback { - fallback_common common; -} recurse_fallback; +typedef struct recurse_backtrack { + backtrack_common common; +} recurse_backtrack; typedef struct compiler_common { struct sljit_compiler *compiler; pcre_uchar *start; - int localsize; + + /* Opcode local area direct map. */ int *localptrs; + int cbraptr; + /* OVector starting point. Must be divisible by 2. */ + int ovector_start; + /* Last known position of the requested byte. */ + int req_char_ptr; + /* Head of the last recursion. */ + int recursive_head; + /* First inspected character for partial matching. */ + int start_used_ptr; + /* Starting pointer for partial soft matches. */ + int hit_start; + /* End pointer of the first line. */ + int first_line_end; + /* Points to the marked string. */ + int mark_ptr; + + /* Other */ const pcre_uint8 *fcc; sljit_w lcc; - int cbraptr; + int mode; int nltype; int newline; int bsr_nltype; int endonly; + BOOL has_set_som; sljit_w ctypes; sljit_uw name_table; sljit_w name_count; sljit_w name_entry_size; + + /* Labels and jump lists. */ + struct sljit_label *partialmatchlabel; + struct sljit_label *leavelabel; struct sljit_label *acceptlabel; stub_list *stubs; recurse_entry *entries; recurse_entry *currententry; + jump_list *partialmatch; + jump_list *leave; jump_list *accept; jump_list *calllimit; jump_list *stackalloc; @@ -309,7 +335,9 @@ typedef struct compiler_common { #endif #endif /* SUPPORT_UTF */ #ifdef SUPPORT_UCP - jump_list *getucd; + jump_list *getunichartype; + jump_list *getunichartype_2; + jump_list *getunicharscript; #endif } compiler_common; @@ -349,7 +377,8 @@ typedef struct compare_context { enum { frame_end = 0, - frame_setstrbegin = -1 + frame_setstrbegin = -1, + frame_setmark = -2 }; /* Undefine sljit macros. */ @@ -376,19 +405,13 @@ enum { /* Two local variables for possessive quantifiers (char1 cannot use them). */ #define POSSESSIVE0 (2 * sizeof(sljit_w)) #define POSSESSIVE1 (3 * sizeof(sljit_w)) -/* Head of the last recursion. */ -#define RECURSIVE_HEAD (4 * sizeof(sljit_w)) /* Max limit of recursions. */ -#define CALL_LIMIT (5 * sizeof(sljit_w)) -/* Last known position of the requested byte. */ -#define REQ_CHAR_PTR (6 * sizeof(sljit_w)) -/* End pointer of the first line. */ -#define FIRSTLINE_END (7 * sizeof(sljit_w)) +#define CALL_LIMIT (4 * sizeof(sljit_w)) /* The output vector is stored on the stack, and contains pointers to characters. The vector data is divided into two groups: the first group contains the start / end character pointers, and the second is the start pointers when the end of the capturing group has not yet reached. */ -#define OVECTOR_START (8 * sizeof(sljit_w)) +#define OVECTOR_START (common->ovector_start) #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w)) #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w)) #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start]) @@ -426,6 +449,8 @@ the start pointers when the end of the capturing group has not yet reached. */ sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) #define COND_VALUE(op, dst, dstw, type) \ sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type)) +#define GET_LOCAL_BASE(dst, dstw, offset) \ + sljit_get_local_base(compiler, (dst), (dstw), (offset)) static pcre_uchar* bracketend(pcre_uchar* cc) { @@ -444,8 +469,8 @@ return cc; init_frame get_localsize copy_locals - compile_hotpath - compile_fallbackpath + compile_trypath + compile_backtrackpath */ static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc) @@ -497,6 +522,7 @@ switch(*cc) case OP_BRAZERO: case OP_BRAMINZERO: case OP_BRAPOSZERO: + case OP_COMMIT: case OP_FAIL: case OP_ACCEPT: case OP_ASSERT_ACCEPT: @@ -635,6 +661,9 @@ switch(*cc) case OP_SCBRAPOS: return cc + 1 + LINK_SIZE + IMM2_SIZE; + case OP_MARK: + return cc + 1 + 2 + cc[1]; + default: return NULL; } @@ -649,6 +678,11 @@ while (cc < ccend) { switch(*cc) { + case OP_SET_SOM: + common->has_set_som = TRUE; + cc += 1; + break; + case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: @@ -677,6 +711,25 @@ while (cc < ccend) cc += 1 + LINK_SIZE; break; + case OP_RECURSE: + /* Set its value only once. */ + if (common->recursive_head == 0) + { + common->recursive_head = common->ovector_start; + common->ovector_start += sizeof(sljit_w); + } + cc += 1 + LINK_SIZE; + break; + + case OP_MARK: + if (common->mark_ptr == 0) + { + common->mark_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_w); + } + cc += 1 + 2 + cc[1]; + break; + default: cc = next_opcode(common, cc); if (cc == NULL) @@ -742,7 +795,8 @@ static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive pcre_uchar *ccend = bracketend(cc); int length = 0; BOOL possessive = FALSE; -BOOL setsom_found = FALSE; +BOOL setsom_found = recursive; +BOOL setmark_found = recursive; if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) { @@ -756,13 +810,37 @@ while (cc < ccend) switch(*cc) { case OP_SET_SOM: - case OP_RECURSE: + SLJIT_ASSERT(common->has_set_som); if (!setsom_found) { length += 2; setsom_found = TRUE; } - cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE; + cc += 1; + break; + + case OP_MARK: + SLJIT_ASSERT(common->mark_ptr != 0); + if (!setmark_found) + { + length += 2; + setmark_found = TRUE; + } + cc += 1 + 2 + cc[1]; + break; + + case OP_RECURSE: + if (common->has_set_som && !setsom_found) + { + length += 2; + setsom_found = TRUE; + } + if (common->mark_ptr != 0 && !setmark_found) + { + length += 2; + setmark_found = TRUE; + } + cc += 1 + LINK_SIZE; break; case OP_CBRA: @@ -792,7 +870,8 @@ static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, in { DEFINE_COMPILER; pcre_uchar *ccend = bracketend(cc); -BOOL setsom_found = FALSE; +BOOL setsom_found = recursive; +BOOL setmark_found = recursive; int offset; /* >= 1 + shortest item size (2) */ @@ -807,7 +886,7 @@ while (cc < ccend) switch(*cc) { case OP_SET_SOM: - case OP_RECURSE: + SLJIT_ASSERT(common->has_set_som); if (!setsom_found) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); @@ -817,7 +896,43 @@ while (cc < ccend) stackpos += (int)sizeof(sljit_w); setsom_found = TRUE; } - cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE; + cc += 1; + break; + + case OP_MARK: + SLJIT_ASSERT(common->mark_ptr != 0); + if (!setmark_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark); + stackpos += (int)sizeof(sljit_w); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos += (int)sizeof(sljit_w); + setmark_found = TRUE; + } + cc += 1 + 2 + cc[1]; + break; + + case OP_RECURSE: + if (common->has_set_som && !setsom_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin); + stackpos += (int)sizeof(sljit_w); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos += (int)sizeof(sljit_w); + setsom_found = TRUE; + } + if (common->mark_ptr != 0 && !setmark_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark); + stackpos += (int)sizeof(sljit_w); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos += (int)sizeof(sljit_w); + setmark_found = TRUE; + } + cc += 1 + LINK_SIZE; break; case OP_CBRA: @@ -944,9 +1059,9 @@ while (status != end) switch(status) { case start: - SLJIT_ASSERT(save); + SLJIT_ASSERT(save && common->recursive_head != 0); count = 1; - srcw[0] = RECURSIVE_HEAD; + srcw[0] = common->recursive_head; status = loop; break; @@ -1213,7 +1328,7 @@ if (length < 8) } else { - OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w)); + GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w)); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length); loop = LABEL(); OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0); @@ -1233,10 +1348,14 @@ OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0); +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount)); +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0); OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin)); -OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START); +GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START); /* Unlikely, but possible */ earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0); loop = LABEL(); @@ -1254,7 +1373,7 @@ JUMPHERE(earlyexit); /* Calculate the return value, which is the maximum ovector value. */ if (topbracket > 1) { - OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w)); + GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w)); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1); /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */ @@ -1268,6 +1387,61 @@ else OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); } +static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave) +{ +DEFINE_COMPILER; + +SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2); +SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0)); + +OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0); +OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL); +OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount)); +CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave); + +/* Store match begin and end. */ +OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets)); +OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start); +OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0); +#ifdef COMPILE_PCRE16 +OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1); +#endif +OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0); + +OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0); +#ifdef COMPILE_PCRE16 +OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1); +#endif +OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0); + +JUMPTO(SLJIT_JUMP, leave); +} + +static SLJIT_INLINE void check_start_used_ptr(compiler_common *common) +{ +/* May destroy TMP1. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (common->mode == JIT_PARTIAL_SOFT_COMPILE) + { + /* The value of -1 must be kept for start_used_ptr! */ + OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1); + /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting + is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ + jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +else if (common->mode == JIT_PARTIAL_HARD_COMPILE) + { + jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +} + static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc) { /* Detects if the character has an othercase. */ @@ -1389,10 +1563,94 @@ return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8)); #endif /* COMPILE_PCRE8 */ } -static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks) +static void check_partial(compiler_common *common, BOOL force) +{ +/* Checks whether a partial matching is occured. Does not modify registers. */ +DEFINE_COMPILER; +struct sljit_jump *jump = NULL; + +SLJIT_ASSERT(!force || common->mode != JIT_COMPILE); + +if (common->mode == JIT_COMPILE) + return; + +if (!force) + jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); +else if (common->mode == JIT_PARTIAL_SOFT_COMPILE) + jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1); + +if (common->mode == JIT_PARTIAL_SOFT_COMPILE) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); +else + { + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } + +if (jump != NULL) + JUMPHERE(jump); +} + +static struct sljit_jump *check_str_end(compiler_common *common) +{ +/* Does not affect registers. Usually used in a tight spot. */ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_jump *nohit; +struct sljit_jump *return_value; + +if (common->mode == JIT_COMPILE) + return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); +if (common->mode == JIT_PARTIAL_SOFT_COMPILE) + { + nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); + JUMPHERE(nohit); + return_value = JUMP(SLJIT_JUMP); + } +else + { + return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } +JUMPHERE(jump); +return return_value; +} + +static void detect_partial_match(compiler_common *common, jump_list **backtracks) { DEFINE_COMPILER; -add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); +struct sljit_jump *jump; + +if (common->mode == JIT_COMPILE) + { + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + return; + } + +/* Partial matching mode. */ +jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); +add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0)); +if (common->mode == JIT_PARTIAL_SOFT_COMPILE) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + } +else + { + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } +JUMPHERE(jump); } static void read_char(compiler_common *common) @@ -1533,7 +1791,7 @@ if (common->utf) OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } -static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue) +static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue) { /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ DEFINE_COMPILER; @@ -1541,7 +1799,7 @@ DEFINE_COMPILER; if (nltype == NLTYPE_ANY) { add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); } else if (nltype == NLTYPE_ANYCRLF) { @@ -1549,12 +1807,12 @@ else if (nltype == NLTYPE_ANYCRLF) COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); - add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); } else { SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); - add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); } } @@ -1568,7 +1826,7 @@ of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */ DEFINE_COMPILER; struct sljit_jump *jump; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); /* Searching for the first zero. */ OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); jump = JUMP(SLJIT_C_NOT_ZERO); @@ -1627,7 +1885,7 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_jump *compare; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); jump = JUMP(SLJIT_C_NOT_ZERO); @@ -1664,7 +1922,7 @@ of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */ DEFINE_COMPILER; struct sljit_jump *jump; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00); /* Do nothing, only return. */ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); @@ -1689,28 +1947,55 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); #ifdef SUPPORT_UCP -/* UCD_BLOCK_SIZE must be 128 (see the assert below). */ -#define UCD_BLOCK_MASK 127 -#define UCD_BLOCK_SHIFT 7 - -static void do_getucd(compiler_common *common) +static sljit_w SLJIT_CALL getunichartype(sljit_w c) { -/* Search the UCD record for the character comes in TMP1. -Returns chartype in TMP1 and UCD offset in TMP2. */ + return (sljit_w)(unsigned int)UCD_CHARTYPE((unsigned int)c); +} + +static sljit_w SLJIT_CALL getunicharscript(sljit_w c) +{ + return (sljit_w)(unsigned int)UCD_SCRIPT((unsigned int)c); +} + +static void do_getunichartype(compiler_common *common) +{ +/* Character comes in TMP1. Returns chartype in TMP1 */ DEFINE_COMPILER; -SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +/* Save registers */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); +sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunichartype)); +/* Restore registers */ +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); +sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +} -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); -OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1)); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2)); -OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); +static void do_getunichartype_2(compiler_common *common) +{ +/* Character comes in TMP1. Returns chartype in TMP1 */ +DEFINE_COMPILER; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +/* Save registers */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STACK_TOP, 0); +sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunichartype)); +/* Restore registers */ +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); +sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +} + +static void do_getunicharscript(compiler_common *common) +{ +/* Character comes in TMP1. Returns chartype in TMP1 */ +DEFINE_COMPILER; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +/* Save registers */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); +sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunicharscript)); +/* Restore registers */ +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } #endif @@ -1737,8 +2022,9 @@ if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY || if (firstline) { /* Search for the end of the first line. */ + SLJIT_ASSERT(common->first_line_end != 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { @@ -1749,18 +2035,18 @@ if (firstline) OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } else { end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); mainloop = LABEL(); /* Continual stores does not cause data dependency. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0); read_char(common); check_newlinechar(common, common->nltype, &newline, TRUE); CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0); set_jumps(newline, LABEL()); } @@ -1843,7 +2129,7 @@ pcre_uchar oc, bit; if (firstline) { OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); } start = LABEL(); @@ -1921,7 +2207,7 @@ jump_list *newline = NULL; if (firstline) { OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); } if (common->nltype == NLTYPE_FIXED && common->newline > 255) @@ -2005,7 +2291,7 @@ struct sljit_jump *jump; if (firstline) { OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); } start = LABEL(); @@ -2070,7 +2356,8 @@ struct sljit_jump *foundoc = NULL; struct sljit_jump *notfound; pcre_uchar oc, bit; -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR); +SLJIT_ASSERT(common->req_char_ptr != 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr); OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX); toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0); alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0); @@ -2115,7 +2402,7 @@ JUMPTO(SLJIT_JUMP, loop); JUMPHERE(found); if (foundoc) JUMPHERE(foundoc); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0); JUMPHERE(alreadyfound); JUMPHERE(toolong); return notfound; @@ -2127,14 +2414,15 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_label *mainloop; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0); +GET_LOCAL_BASE(TMP3, 0, 0); /* Drop frames until we reach STACK_TOP. */ mainloop = LABEL(); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0); jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end); -OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w)); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w)); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w)); @@ -2154,6 +2442,17 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); JUMPTO(SLJIT_JUMP, mainloop); JUMPHERE(jump); +if (common->mark_ptr != 0) + { + jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w)); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0); + JUMPTO(SLJIT_JUMP, mainloop); + + JUMPHERE(jump); + } + /* Unknown command. */ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w)); JUMPTO(SLJIT_JUMP, mainloop); @@ -2162,20 +2461,21 @@ JUMPTO(SLJIT_JUMP, mainloop); static void check_wordboundary(compiler_common *common) { DEFINE_COMPILER; -struct sljit_jump *beginend; +struct sljit_jump *skipread; #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF struct sljit_jump *jump; #endif SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); /* Get type of the previous char, and put it to LOCALS1. */ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0); -beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0); +skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0); skip_char_back(common); +check_start_used_ptr(common); read_char(common); /* Testing char type. */ @@ -2184,7 +2484,7 @@ if (common->use_ucp) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, &common->getunichartype_2, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); @@ -2216,10 +2516,10 @@ else JUMPHERE(jump); #endif /* COMPILE_PCRE8 */ } -JUMPHERE(beginend); +JUMPHERE(skipread); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); -beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +skipread = check_str_end(common); peek_char(common); /* Testing char type. This is a code duplication. */ @@ -2228,7 +2528,7 @@ if (common->use_ucp) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, &common->getunichartype_2, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); @@ -2260,7 +2560,7 @@ else JUMPHERE(jump); #endif /* COMPILE_PCRE8 */ } -JUMPHERE(beginend); +JUMPHERE(skipread); OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); @@ -2271,7 +2571,7 @@ static void check_anynewline(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); @@ -2298,7 +2598,7 @@ static void check_hspace(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); @@ -2337,7 +2637,7 @@ static void check_vspace(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); @@ -2369,7 +2669,7 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_label *label; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0); @@ -2398,7 +2698,7 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_label *label; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0); @@ -2445,16 +2745,16 @@ static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arg { /* This function would be ineffective to do in JIT level. */ int c1, c2; -const pcre_uchar *src2 = args->ptr; +const pcre_uchar *src2 = args->uchar_ptr; const pcre_uchar *end2 = args->end; while (src1 < end1) { if (src2 >= end2) - return 0; + return (pcre_uchar*)1; GETCHARINC(c1, src1); GETCHARINC(c2, src2); - if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0; + if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL; } return src2; } @@ -2462,7 +2762,7 @@ return src2; #endif /* SUPPORT_UTF && SUPPORT_UCP */ static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, - compare_context* context, jump_list **fallbacks) + compare_context* context, jump_list **backtracks) { DEFINE_COMPILER; unsigned int othercasebit = 0; @@ -2563,20 +2863,20 @@ do case 4 / sizeof(pcre_uchar): if (context->oc.asint != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); break; case 2 / sizeof(pcre_uchar): if (context->oc.asushort != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); break; #ifdef COMPILE_PCRE8 case 1: if (context->oc.asbyte != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); break; #endif @@ -2602,10 +2902,10 @@ do if (othercasebit != 0 && othercasechar == cc) { OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); } else - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); #endif @@ -2641,11 +2941,11 @@ return cc; } \ charoffset = (value); -static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks) +static void compile_xclass_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) { DEFINE_COMPILER; jump_list *found = NULL; -jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks; +jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks; unsigned int c; int compares; struct sljit_jump *jump = NULL; @@ -2660,7 +2960,7 @@ int invertcmp, numberofcmps; unsigned int charoffset; /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */ -check_input_end(common, fallbacks); +detect_partial_match(common, backtracks); read_char(common); if ((*cc++ & XCL_MAP) != 0) @@ -2764,39 +3064,45 @@ while (*cc != XCL_END) /* Simple register allocation. TMP1 is preferred if possible. */ if (needstype || needsscript) { - if (needschar && !charsaved) + if ((needschar || needsscript) && !charsaved) OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); + + /* Needed to save important temporary registers. */ + SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && + STACK_TOP == SLJIT_TEMPORARY_REG2 && + TMP2 == SLJIT_TEMPORARY_REG3); + if (needschar) { if (needstype) - { - OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); typereg = RETURN_ADDR; - } - if (needsscript) scriptreg = TMP3; - OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); } else if (needstype && needsscript) scriptreg = TMP3; /* In all other cases only one of them was specified, and that can goes to TMP1. */ + if (needstype) + { + add_jump(compiler, &common->getunichartype, JUMP(SLJIT_FAST_CALL)); + if (typereg != TMP1) + OP1(SLJIT_MOV, typereg, 0, TMP1, 0); + } + if (needsscript) { - if (scriptreg == TMP1) - { - OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3); - } - else - { - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0); - } + /* Get the char again */ + if (needstype) + OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); + + add_jump(compiler, &common->getunicharscript, JUMP(SLJIT_FAST_CALL)); + if (scriptreg != TMP1) + OP1(SLJIT_MOV, scriptreg, 0, TMP1, 0); } + + if (needschar) + OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); } #endif @@ -2811,7 +3117,7 @@ typeoffset = 0; while (*cc != XCL_END) { compares--; - invertcmp = (compares == 0 && list != fallbacks); + invertcmp = (compares == 0 && list != backtracks); jump = NULL; if (*cc == XCL_SINGLE) @@ -2893,7 +3199,7 @@ while (*cc != XCL_END) switch(*cc) { case PT_ANY: - if (list != fallbacks) + if (list != backtracks) { if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0)) continue; @@ -2966,7 +3272,7 @@ while (*cc != XCL_END) #endif if (jump != NULL) - add_jump(compiler, compares > 0 ? list : fallbacks, jump); + add_jump(compiler, compares > 0 ? list : backtracks, jump); } if (found != NULL) @@ -2978,7 +3284,7 @@ if (found != NULL) #endif -static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks) +static pcre_uchar *compile_char1_trypath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) { DEFINE_COMPILER; int length; @@ -2997,63 +3303,68 @@ switch(type) case OP_SOD: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); return cc; case OP_SOM: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); return cc; case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); return cc; case OP_NOT_DIGIT: case OP_DIGIT: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char8_type(common); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); - add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); return cc; case OP_NOT_WHITESPACE: case OP_WHITESPACE: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char8_type(common); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); - add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); return cc; case OP_NOT_WORDCHAR: case OP_WORDCHAR: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char8_type(common); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); - add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); return cc; case OP_ANY: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); - jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (common->mode != JIT_PARTIAL_HARD_COMPILE) + jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + else + jump[1] = check_str_end(common); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); - JUMPHERE(jump[1]); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); + if (jump[1] != NULL) + JUMPHERE(jump[1]); JUMPHERE(jump[0]); } else - check_newlinechar(common, common->nltype, fallbacks, TRUE); + check_newlinechar(common, common->nltype, backtracks, TRUE); return cc; case OP_ALLANY: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); #ifdef SUPPORT_UTF if (common->utf) { @@ -3081,7 +3392,7 @@ switch(type) return cc; case OP_ANYBYTE: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); return cc; @@ -3094,22 +3405,26 @@ switch(type) propdata[2] = cc[0]; propdata[3] = cc[1]; propdata[4] = XCL_END; - compile_xclass_hotpath(common, propdata, fallbacks); + compile_xclass_trypath(common, propdata, backtracks); return cc + 2; #endif #endif case OP_ANYNL: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); - jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + /* We don't need to handle soft partial matching case. */ + if (common->mode != JIT_PARTIAL_HARD_COMPILE) + jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + else + jump[1] = check_str_end(common); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); jump[3] = JUMP(SLJIT_JUMP); JUMPHERE(jump[0]); - check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE); + check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); JUMPHERE(jump[1]); JUMPHERE(jump[2]); JUMPHERE(jump[3]); @@ -3117,58 +3432,79 @@ switch(type) case OP_NOT_HSPACE: case OP_HSPACE: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); return cc; case OP_NOT_VSPACE: case OP_VSPACE: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); return cc; #ifdef SUPPORT_UCP case OP_EXTUNI: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, &common->getunichartype, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc)); label = LABEL(); jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); read_char(common); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, &common->getunichartype, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc); CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label); OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); JUMPHERE(jump[0]); + if (common->mode == JIT_PARTIAL_HARD_COMPILE) + { + jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); + /* Since we successfully read a char above, partial matching must occure. */ + check_partial(common, TRUE); + JUMPHERE(jump[0]); + } return cc; #endif case OP_EODN: + /* Requires rather complex checks. */ jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL); + add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } else if (common->nltype == NLTYPE_FIXED) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); } else { @@ -3177,44 +3513,46 @@ switch(type) OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); jump[2] = JUMP(SLJIT_C_GREATER); - add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS)); + add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS)); /* Equal. */ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); - add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP)); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); JUMPHERE(jump[1]); if (common->nltype == NLTYPE_ANYCRLF) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); } else { OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0); read_char(common); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); } JUMPHERE(jump[2]); JUMPHERE(jump[3]); } JUMPHERE(jump[0]); + check_partial(common, FALSE); return cc; case OP_EOD: - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); return cc; case OP_CIRC: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0)); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); return cc; case OP_CIRCM: @@ -3222,25 +3560,25 @@ switch(type) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0)); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } else { skip_char_back(common); read_char(common); - check_newlinechar(common, common->nltype, fallbacks, FALSE); + check_newlinechar(common, common->nltype, backtracks, FALSE); } JUMPHERE(jump[0]); return cc; @@ -3248,35 +3586,50 @@ switch(type) case OP_DOLL: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); if (!common->endonly) - compile_char1_hotpath(common, OP_EODN, cc, fallbacks); + compile_char1_trypath(common, OP_EODN, cc, backtracks); else - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); + { + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); + } return cc; case OP_DOLLM: jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + check_partial(common, FALSE); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0)); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0); + /* STR_PTR = STR_END - IN_UCHARS(1) */ + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } else { peek_char(common); - check_newlinechar(common, common->nltype, fallbacks, FALSE); + check_newlinechar(common, common->nltype, backtracks, FALSE); } JUMPHERE(jump[0]); return cc; @@ -3287,19 +3640,19 @@ switch(type) #ifdef SUPPORT_UTF if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); #endif - if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0) + if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)) { OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); context.length = IN_UCHARS(length); context.sourcereg = -1; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED context.ucharptr = 0; #endif - return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks); + return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); } - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); #ifdef SUPPORT_UTF if (common->utf) @@ -3309,16 +3662,29 @@ switch(type) else #endif c = *cc; + if (type == OP_CHAR || !char_has_othercase(common, cc)) + { + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + return cc + length; + } + oc = char_othercase(common, c); + bit = c ^ oc; + if (ispowerof2(bit)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); + return cc + length; + } OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c)); COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); - add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); return cc + length; case OP_NOT: case OP_NOTI: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); length = 1; #ifdef SUPPORT_UTF if (common->utf) @@ -3329,12 +3695,12 @@ switch(type) { OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); if (type == OP_NOT || !char_has_othercase(common, cc)) - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); else { /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); } /* Skip the variable-length character. */ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -3359,7 +3725,7 @@ switch(type) } if (type == OP_NOT || !char_has_othercase(common, cc)) - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); else { oc = char_othercase(common, c); @@ -3367,19 +3733,19 @@ switch(type) if (ispowerof2(bit)) { OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); } else { - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc)); } } - return cc + 1; + return cc + length; case OP_CLASS: case OP_NCLASS: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 jump[0] = NULL; @@ -3392,7 +3758,7 @@ switch(type) jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); if (type == OP_CLASS) { - add_jump(compiler, fallbacks, jump[0]); + add_jump(compiler, backtracks, jump[0]); jump[0] = NULL; } } @@ -3402,7 +3768,7 @@ switch(type) OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); - add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 if (jump[0] != NULL) JUMPHERE(jump[0]); @@ -3411,13 +3777,14 @@ switch(type) #if defined SUPPORT_UTF || defined COMPILE_PCRE16 case OP_XCLASS: - compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks); + compile_xclass_trypath(common, cc + LINK_SIZE, backtracks); return cc + GET(cc, 0) - 1; #endif case OP_REVERSE: length = GET(cc, 0); - SLJIT_ASSERT(length > 0); + if (length == 0) + return cc + LINK_SIZE; OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); #ifdef SUPPORT_UTF if (common->utf) @@ -3425,23 +3792,26 @@ switch(type) OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); label = LABEL(); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); skip_char_back(common); OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_C_NOT_ZERO, label); - return cc + LINK_SIZE; } + else #endif - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0)); + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0)); + } + check_start_used_ptr(common); return cc + LINK_SIZE; } SLJIT_ASSERT_STOP(); return cc; } -static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks) +static SLJIT_INLINE pcre_uchar *compile_charn_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks) { /* This function consumes at least one input character. */ /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ @@ -3493,21 +3863,21 @@ if (context.length > 0) { /* We have a fixed-length byte sequence. */ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); context.sourcereg = -1; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED context.ucharptr = 0; #endif - do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0); + do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); return cc; } /* A non-fixed length character will be checked if length == 0. */ -return compile_char1_hotpath(common, *cc, cc + 1, fallbacks); +return compile_char1_trypath(common, *cc, cc + 1, backtracks); } -static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks) +static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) { DEFINE_COMPILER; int offset = GET2(cc, 1) << 1; @@ -3515,60 +3885,64 @@ int offset = GET2(cc, 1) << 1; OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); if (!common->jscript_compat) { - if (fallbacks == NULL) + if (backtracks == NULL) { + /* OVECTOR(1) contains the "string begin - 1" constant. */ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); return JUMP(SLJIT_C_NOT_ZERO); } - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); } return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); } /* Forward definitions. */ -static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *); -static void compile_fallbackpath(compiler_common *, struct fallback_common *); +static void compile_trypath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *); +static void compile_backtrackpath(compiler_common *, struct backtrack_common *); -#define PUSH_FALLBACK(size, ccstart, error) \ +#define PUSH_BACKTRACK(size, ccstart, error) \ do \ { \ - fallback = sljit_alloc_memory(compiler, (size)); \ + backtrack = sljit_alloc_memory(compiler, (size)); \ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ return error; \ - memset(fallback, 0, size); \ - fallback->prev = parent->top; \ - fallback->cc = (ccstart); \ - parent->top = fallback; \ + memset(backtrack, 0, size); \ + backtrack->prev = parent->top; \ + backtrack->cc = (ccstart); \ + parent->top = backtrack; \ } \ while (0) -#define PUSH_FALLBACK_NOVALUE(size, ccstart) \ +#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \ do \ { \ - fallback = sljit_alloc_memory(compiler, (size)); \ + backtrack = sljit_alloc_memory(compiler, (size)); \ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ return; \ - memset(fallback, 0, size); \ - fallback->prev = parent->top; \ - fallback->cc = (ccstart); \ - parent->top = fallback; \ + memset(backtrack, 0, size); \ + backtrack->prev = parent->top; \ + backtrack->cc = (ccstart); \ + parent->top = backtrack; \ } \ while (0) -#define FALLBACK_AS(type) ((type*)fallback) +#define BACKTRACK_AS(type) ((type *)backtrack) -static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail) +static pcre_uchar *compile_ref_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) { DEFINE_COMPILER; int offset = GET2(cc, 1) << 1; struct sljit_jump *jump = NULL; +struct sljit_jump *partial; +struct sljit_jump *nopartial; OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); +/* OVECTOR(1) contains the "string begin - 1" constant. */ if (withchecks && !common->jscript_compat) - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); #if defined SUPPORT_UTF && defined SUPPORT_UCP if (common->utf && *cc == OP_REFI) @@ -3581,10 +3955,19 @@ if (common->utf && *cc == OP_REFI) /* Needed to save important temporary registers. */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0); sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1)); + else + { + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); + nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); + check_partial(common, FALSE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(nopartial); + } OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); } else @@ -3593,27 +3976,47 @@ else OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); if (withchecks) jump = JUMP(SLJIT_C_ZERO); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, partial); + add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + + if (common->mode != JIT_COMPILE) + { + nopartial = JUMP(SLJIT_JUMP); + JUMPHERE(partial); + /* TMP2 -= STR_END - STR_PTR */ + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); + partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); + add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + JUMPHERE(partial); + check_partial(common, FALSE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(nopartial); + } } if (jump != NULL) { if (emptyfail) - add_jump(compiler, fallbacks, jump); + add_jump(compiler, backtracks, jump); else JUMPHERE(jump); } return cc + 1 + IMM2_SIZE; } -static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static SLJIT_INLINE pcre_uchar *compile_ref_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; pcre_uchar type; struct sljit_label *label; struct sljit_jump *zerolength; @@ -3622,7 +4025,7 @@ pcre_uchar *ccbegin = cc; int min = 0, max = 0; BOOL minimize; -PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); type = cc[1 + IMM2_SIZE]; minimize = (type & 0x1) != 0; @@ -3674,14 +4077,14 @@ if (!minimize) { allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks); + zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks); } if (min > 1 || max > 1) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0); label = LABEL(); - compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE); + compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); if (min > 1 || max > 1) { @@ -3709,7 +4112,7 @@ if (!minimize) } JUMPHERE(zerolength); - FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); + BACKTRACK_AS(iterator_backtrack)->trypath = LABEL(); decrease_call_count(common); return cc; @@ -3727,13 +4130,13 @@ if (min == 0) jump = JUMP(SLJIT_JUMP); } else - zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks); + zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks); -FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); +BACKTRACK_AS(iterator_backtrack)->trypath = LABEL(); if (max > 0) - add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); -compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE); +compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); if (min > 1) @@ -3741,7 +4144,7 @@ if (min > 1) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath); + CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->trypath); } else if (max > 0) OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); @@ -3754,15 +4157,15 @@ decrease_call_count(common); return cc; } -static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static SLJIT_INLINE pcre_uchar *compile_recurse_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; recurse_entry *entry = common->entries; recurse_entry *prev = NULL; int start = GET(cc, 1); -PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL); while (entry != NULL) { if (entry->start == start) @@ -3787,36 +4190,49 @@ if (entry == NULL) common->entries = entry; } -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); -allocate_stack(common, 1); -OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); +if (common->has_set_som && common->mark_ptr != 0) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); + allocate_stack(common, 2); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + } +else if (common->has_set_som || common->mark_ptr != 0) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } if (entry->entry == NULL) add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL)); else JUMPTO(SLJIT_FAST_CALL, entry->entry); /* Leave if the match is failed. */ -add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0)); +add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0)); return cc + 1 + LINK_SIZE; } -static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional) +static pcre_uchar *compile_assert_trypath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional) { DEFINE_COMPILER; int framesize; int localptr; -fallback_common altfallback; +backtrack_common altbacktrack; pcre_uchar *ccbegin; pcre_uchar opcode; pcre_uchar bra = OP_BRA; jump_list *tmp = NULL; -jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks; +jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks; jump_list **found; /* Saving previous accept variables. */ +struct sljit_label *save_leavelabel = common->leavelabel; struct sljit_label *save_acceptlabel = common->acceptlabel; +jump_list *save_leave = common->leave; +jump_list *save_accept = common->accept; struct sljit_jump *jump; struct sljit_jump *brajump = NULL; -jump_list *save_accept = common->accept; if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) { @@ -3827,8 +4243,8 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) localptr = PRIV_DATA(cc); SLJIT_ASSERT(localptr != 0); framesize = get_framesize(common, cc, FALSE); -fallback->framesize = framesize; -fallback->localptr = localptr; +backtrack->framesize = framesize; +backtrack->localptr = localptr; opcode = *cc; SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT); found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target; @@ -3837,7 +4253,7 @@ cc += GET(cc, 1); if (bra == OP_BRAMINZERO) { - /* This is a braminzero fallback path. */ + /* This is a braminzero backtrack path. */ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); @@ -3860,22 +4276,26 @@ else init_frame(common, ccbegin, framesize + 1, 2, FALSE); } -memset(&altfallback, 0, sizeof(fallback_common)); +memset(&altbacktrack, 0, sizeof(backtrack_common)); +common->leavelabel = NULL; +common->leave = NULL; while (1) { common->acceptlabel = NULL; common->accept = NULL; - altfallback.top = NULL; - altfallback.topfallbacks = NULL; + altbacktrack.top = NULL; + altbacktrack.topbacktracks = NULL; if (*ccbegin == OP_ALT) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - altfallback.cc = ccbegin; - compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback); + altbacktrack.cc = ccbegin; + compile_trypath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { + common->leavelabel = save_leavelabel; common->acceptlabel = save_acceptlabel; + common->leave = save_leave; common->accept = save_accept; return NULL; } @@ -3925,14 +4345,16 @@ while (1) } add_jump(compiler, found, JUMP(SLJIT_JUMP)); - compile_fallbackpath(common, altfallback.top); + compile_backtrackpath(common, altbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { + common->leavelabel = save_leavelabel; common->acceptlabel = save_acceptlabel; + common->leave = save_leave; common->accept = save_accept; return NULL; } - set_jumps(altfallback.topfallbacks, LABEL()); + set_jumps(altbacktrack.topbacktracks, LABEL()); if (*cc != OP_ALT) break; @@ -3941,6 +4363,8 @@ while (1) cc += GET(cc, 1); } /* None of them matched. */ +if (common->leave != NULL) + set_jumps(common->leave, LABEL()); if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) { @@ -4007,12 +4431,12 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) if (bra == OP_BRAZERO) { - fallback->hotpath = LABEL(); - sljit_set_label(jump, fallback->hotpath); + backtrack->trypath = LABEL(); + sljit_set_label(jump, backtrack->trypath); } else if (bra == OP_BRAMINZERO) { - JUMPTO(SLJIT_JUMP, fallback->hotpath); + JUMPTO(SLJIT_JUMP, backtrack->trypath); JUMPHERE(brajump); if (framesize >= 0) { @@ -4020,7 +4444,7 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w)); } - set_jumps(fallback->common.topfallbacks, LABEL()); + set_jumps(backtrack->common.topbacktracks, LABEL()); } } else @@ -4050,22 +4474,24 @@ else } if (bra == OP_BRAZERO) - fallback->hotpath = LABEL(); + backtrack->trypath = LABEL(); else if (bra == OP_BRAMINZERO) { - JUMPTO(SLJIT_JUMP, fallback->hotpath); + JUMPTO(SLJIT_JUMP, backtrack->trypath); JUMPHERE(brajump); } if (bra != OP_BRA) { - SLJIT_ASSERT(found == &fallback->common.topfallbacks); - set_jumps(fallback->common.topfallbacks, LABEL()); - fallback->common.topfallbacks = NULL; + SLJIT_ASSERT(found == &backtrack->common.topbacktracks); + set_jumps(backtrack->common.topbacktracks, LABEL()); + backtrack->common.topbacktracks = NULL; } } +common->leavelabel = save_leavelabel; common->acceptlabel = save_acceptlabel; +common->leave = save_leave; common->accept = save_accept; return cc + 1 + LINK_SIZE; } @@ -4080,7 +4506,8 @@ sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)]; sljit_w no_capture; int i; -locals += OVECTOR_START / sizeof(sljit_w); +locals += refno & 0xff; +refno >>= 8; no_capture = locals[1]; for (i = 0; i < name_count; i++) @@ -4233,26 +4660,26 @@ return condition; Or nothing, if trace is unnecessary */ -static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static pcre_uchar *compile_bracket_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; pcre_uchar opcode; int localptr = 0; int offset = 0; int stacksize; pcre_uchar *ccbegin; -pcre_uchar *hotpath; +pcre_uchar *trypath; pcre_uchar bra = OP_BRA; pcre_uchar ket; -assert_fallback *assert; +assert_backtrack *assert; BOOL has_alternatives; struct sljit_jump *jump; struct sljit_jump *skip; struct sljit_label *rmaxlabel = NULL; struct sljit_jump *braminzerojump = NULL; -PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) { @@ -4263,12 +4690,12 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) opcode = *cc; ccbegin = cc; -hotpath = ccbegin + 1 + LINK_SIZE; +trypath = ccbegin + 1 + LINK_SIZE; if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF) { - /* Drop this bracket_fallback. */ - parent->top = fallback->prev; + /* Drop this bracket_backtrack. */ + parent->top = backtrack->prev; return bracketend(cc); } @@ -4280,10 +4707,10 @@ cc += GET(cc, 1); has_alternatives = *cc == OP_ALT; if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) { - has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE; - if (*hotpath == OP_NRREF) + has_alternatives = (*trypath == OP_RREF) ? FALSE : TRUE; + if (*trypath == OP_NRREF) { - stacksize = GET2(hotpath, 1); + stacksize = GET2(trypath, 1); if (common->currententry == NULL || stacksize == RREF_ANY) has_alternatives = FALSE; else if (common->currententry->start == 0) @@ -4304,17 +4731,17 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA) offset = GET2(ccbegin, 1 + LINK_SIZE); localptr = OVECTOR_PRIV(offset); offset <<= 1; - FALLBACK_AS(bracket_fallback)->localptr = localptr; - hotpath += IMM2_SIZE; + BACKTRACK_AS(bracket_backtrack)->localptr = localptr; + trypath += IMM2_SIZE; } else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND) { /* Other brackets simply allocate the next entry. */ localptr = PRIV_DATA(ccbegin); SLJIT_ASSERT(localptr != 0); - FALLBACK_AS(bracket_fallback)->localptr = localptr; + BACKTRACK_AS(bracket_backtrack)->localptr = localptr; if (opcode == OP_ONCE) - FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE); + BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE); } /* Instructions before the first alternative. */ @@ -4339,7 +4766,7 @@ if (bra == OP_BRAZERO) if (bra == OP_BRAMINZERO) { - /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */ + /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); if (ket != OP_KETRMIN) { @@ -4356,7 +4783,7 @@ if (bra == OP_BRAMINZERO) skip = JUMP(SLJIT_JUMP); JUMPHERE(jump); /* Checking zero-length iteration. */ - if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0) + if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) { /* When we come from outside, localptr contains the previous STR_PTR. */ braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); @@ -4365,7 +4792,7 @@ if (bra == OP_BRAMINZERO) { /* Except when the whole stack frame must be saved. */ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w)); + braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w)); } JUMPHERE(skip); } @@ -4379,19 +4806,19 @@ if (bra == OP_BRAMINZERO) } if (ket == OP_KETRMIN) - FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL(); + BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL(); if (ket == OP_KETRMAX) { rmaxlabel = LABEL(); if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA) - FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel; + BACKTRACK_AS(bracket_backtrack)->alttrypath = rmaxlabel; } /* Handling capturing brackets and alternatives. */ if (opcode == OP_ONCE) { - if (FALLBACK_AS(bracket_fallback)->u.framesize < 0) + if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) { /* Neither capturing brackets nor recursions are not found in the block. */ if (ket == OP_KETRMIN) @@ -4415,22 +4842,22 @@ if (opcode == OP_ONCE) { if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) { - allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2); + allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1)); + OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE); + init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE); } else { - allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1); + allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize)); + OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); - init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE); + init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE); } } } @@ -4464,38 +4891,38 @@ else if (has_alternatives) /* Generating code for the first alternative. */ if (opcode == OP_COND || opcode == OP_SCOND) { - if (*hotpath == OP_CREF) + if (*trypath == OP_CREF) { SLJIT_ASSERT(has_alternatives); - add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), - CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); - hotpath += 1 + IMM2_SIZE; + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), + CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(trypath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); + trypath += 1 + IMM2_SIZE; } - else if (*hotpath == OP_NCREF) + else if (*trypath == OP_NCREF) { SLJIT_ASSERT(has_alternatives); - stacksize = GET2(hotpath, 1); + stacksize = GET2(trypath, 1); jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0); + OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w))); + GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table); sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); - add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0)); + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0)); JUMPHERE(jump); - hotpath += 1 + IMM2_SIZE; + trypath += 1 + IMM2_SIZE; } - else if (*hotpath == OP_RREF || *hotpath == OP_NRREF) + else if (*trypath == OP_RREF || *trypath == OP_NRREF) { /* Never has other case. */ - FALLBACK_AS(bracket_fallback)->u.condfailed = NULL; + BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; - stacksize = GET2(hotpath, 1); + stacksize = GET2(trypath, 1); if (common->currententry == NULL) stacksize = 0; else if (stacksize == RREF_ANY) @@ -4505,61 +4932,61 @@ if (opcode == OP_COND || opcode == OP_SCOND) else stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE); - if (*hotpath == OP_RREF || stacksize || common->currententry == NULL) + if (*trypath == OP_RREF || stacksize || common->currententry == NULL) { SLJIT_ASSERT(!has_alternatives); if (stacksize != 0) - hotpath += 1 + IMM2_SIZE; + trypath += 1 + IMM2_SIZE; else { if (*cc == OP_ALT) { - hotpath = cc + 1 + LINK_SIZE; + trypath = cc + 1 + LINK_SIZE; cc += GET(cc, 1); } else - hotpath = cc; + trypath = cc; } } else { SLJIT_ASSERT(has_alternatives); - stacksize = GET2(hotpath, 1); + stacksize = GET2(trypath, 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE)); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0); + GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table); sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); - add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0)); - hotpath += 1 + IMM2_SIZE; + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0)); + trypath += 1 + IMM2_SIZE; } } else { - SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT); - /* Similar code as PUSH_FALLBACK macro. */ - assert = sljit_alloc_memory(compiler, sizeof(assert_fallback)); + SLJIT_ASSERT(has_alternatives && *trypath >= OP_ASSERT && *trypath <= OP_ASSERTBACK_NOT); + /* Similar code as PUSH_BACKTRACK macro. */ + assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack)); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; - memset(assert, 0, sizeof(assert_fallback)); - assert->common.cc = hotpath; - FALLBACK_AS(bracket_fallback)->u.assert = assert; - hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE); + memset(assert, 0, sizeof(assert_backtrack)); + assert->common.cc = trypath; + BACKTRACK_AS(bracket_backtrack)->u.assert = assert; + trypath = compile_assert_trypath(common, trypath, assert, TRUE); } } -compile_hotpath(common, hotpath, cc, fallback); +compile_trypath(common, trypath, cc, backtrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; if (opcode == OP_ONCE) { - if (FALLBACK_AS(bracket_fallback)->u.framesize < 0) + if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); /* TMP2 which is set here used by OP_KETRMAX below. */ @@ -4574,7 +5001,7 @@ if (opcode == OP_ONCE) else { stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1; - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w)); + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w)); if (ket == OP_KETRMAX) { /* TMP2 which is set here used by OP_KETRMAX below. */ @@ -4609,10 +5036,10 @@ if (has_alternatives) if (opcode != OP_ONCE) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); if (ket != OP_KETRMAX) - FALLBACK_AS(bracket_fallback)->althotpath = LABEL(); + BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL(); } -/* Must be after the hotpath label. */ +/* Must be after the trypath label. */ if (offset != 0) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); @@ -4625,33 +5052,38 @@ if (ket == OP_KETRMAX) if (opcode == OP_ONCE || opcode >= OP_SBRA) { if (has_alternatives) - FALLBACK_AS(bracket_fallback)->althotpath = LABEL(); + BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL(); /* Checking zero-length iteration. */ if (opcode != OP_ONCE) + { CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel); + /* Drop STR_PTR for greedy plus quantifier. */ + if (bra != OP_BRAZERO) + free_stack(common, 1); + } else /* TMP2 must contain the starting STR_PTR. */ CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel); } else JUMPTO(SLJIT_JUMP, rmaxlabel); - FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL(); + BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL(); } if (bra == OP_BRAZERO) - FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL(); + BACKTRACK_AS(bracket_backtrack)->zerotrypath = LABEL(); if (bra == OP_BRAMINZERO) { - /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */ - JUMPTO(SLJIT_JUMP, ((braminzero_fallback*)parent)->hotpath); + /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */ + JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->trypath); if (braminzerojump != NULL) { JUMPHERE(braminzerojump); /* We need to release the end pointer to perform the - fallback for the zero-length iteration. When + backtrack for the zero-length iteration. When framesize is < 0, OP_ONCE will do the release itself. */ - if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0) + if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); @@ -4659,7 +5091,7 @@ if (bra == OP_BRAMINZERO) else if (ket == OP_KETRMIN && opcode != OP_ONCE) free_stack(common, 1); } - /* Continue to the normal fallback. */ + /* Continue to the normal backtrack. */ } if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) @@ -4672,10 +5104,10 @@ cc += 1 + LINK_SIZE; return cc; } -static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static pcre_uchar *compile_bracketpos_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; pcre_uchar opcode; int localptr; int cbraprivptr = 0; @@ -4688,7 +5120,7 @@ int stack; struct sljit_label *loop = NULL; struct jump_list *emptymatch = NULL; -PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL); if (*cc == OP_BRAPOSZERO) { zero = TRUE; @@ -4698,7 +5130,7 @@ if (*cc == OP_BRAPOSZERO) opcode = *cc; localptr = PRIV_DATA(cc); SLJIT_ASSERT(localptr != 0); -FALLBACK_AS(bracketpos_fallback)->localptr = localptr; +BACKTRACK_AS(bracketpos_backtrack)->localptr = localptr; switch(opcode) { case OP_BRAPOS: @@ -4720,13 +5152,13 @@ switch(opcode) } framesize = get_framesize(common, cc, FALSE); -FALLBACK_AS(bracketpos_fallback)->framesize = framesize; +BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize; if (framesize < 0) { stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1; if (!zero) stacksize++; - FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize; + BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; allocate_stack(common, stacksize); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0); @@ -4750,7 +5182,7 @@ else stacksize++; if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS) stacksize++; - FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize; + BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; allocate_stack(common, stacksize); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); @@ -4777,11 +5209,11 @@ if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) loop = LABEL(); while (*cc != OP_KETRPOS) { - fallback->top = NULL; - fallback->topfallbacks = NULL; + backtrack->top = NULL; + backtrack->topbacktracks = NULL; cc += GET(cc, 1); - compile_hotpath(common, ccbegin, cc, fallback); + compile_trypath(common, ccbegin, cc, backtrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; @@ -4842,10 +5274,10 @@ while (*cc != OP_KETRPOS) JUMPTO(SLJIT_JUMP, loop); flush_stubs(common); - compile_fallbackpath(common, fallback->top); + compile_backtrackpath(common, backtrack->top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; - set_jumps(fallback->topfallbacks, LABEL()); + set_jumps(backtrack->topbacktracks, LABEL()); if (framesize < 0) { @@ -4875,13 +5307,13 @@ while (*cc != OP_KETRPOS) ccbegin = cc + 1 + LINK_SIZE; } -fallback->topfallbacks = NULL; +backtrack->topbacktracks = NULL; if (!zero) { if (framesize < 0) - add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); else /* TMP2 is set to [localptr] above. */ - add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0)); } /* None of them matched. */ @@ -4982,10 +5414,10 @@ if (end != NULL) return cc; } -static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static pcre_uchar *compile_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; pcre_uchar opcode; pcre_uchar type; int arg1 = -1, arg2 = -1; @@ -4994,7 +5426,7 @@ jump_list *nomatch = NULL; struct sljit_jump *jump = NULL; struct sljit_label *label; -PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end); @@ -5021,7 +5453,7 @@ switch(opcode) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0); label = LABEL(); - compile_char1_hotpath(common, type, cc, &fallback->topfallbacks); + compile_char1_trypath(common, type, cc, &backtrack->topbacktracks); if (opcode == OP_UPTO || opcode == OP_CRRANGE) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); @@ -5041,11 +5473,13 @@ switch(opcode) } else { + if (opcode == OP_PLUS) + compile_char1_trypath(common, type, cc, &backtrack->topbacktracks); allocate_stack(common, 2); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); label = LABEL(); - compile_char1_hotpath(common, type, cc, &nomatch); + compile_char1_trypath(common, type, cc, &nomatch); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0)) { @@ -5060,21 +5494,20 @@ switch(opcode) CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label); } set_jumps(nomatch, LABEL()); - if (opcode == OP_PLUS || opcode == OP_CRRANGE) - add_jump(compiler, &fallback->topfallbacks, - CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1)); + if (opcode == OP_CRRANGE) + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); } - FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); + BACKTRACK_AS(iterator_backtrack)->trypath = LABEL(); break; case OP_MINSTAR: case OP_MINPLUS: + if (opcode == OP_MINPLUS) + compile_char1_trypath(common, type, cc, &backtrack->topbacktracks); allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - if (opcode == OP_MINPLUS) - add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP)); - FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); + BACKTRACK_AS(iterator_backtrack)->trypath = LABEL(); break; case OP_MINUPTO: @@ -5083,8 +5516,8 @@ switch(opcode) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); if (opcode == OP_CRMINRANGE) - add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP)); - FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); + add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); + BACKTRACK_AS(iterator_backtrack)->trypath = LABEL(); break; case OP_QUERY: @@ -5092,14 +5525,14 @@ switch(opcode) allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); if (opcode == OP_QUERY) - compile_char1_hotpath(common, type, cc, &fallback->topfallbacks); - FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); + compile_char1_trypath(common, type, cc, &backtrack->topbacktracks); + BACKTRACK_AS(iterator_backtrack)->trypath = LABEL(); break; case OP_EXACT: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1); label = LABEL(); - compile_char1_hotpath(common, type, cc, &fallback->topfallbacks); + compile_char1_trypath(common, type, cc, &backtrack->topbacktracks); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0); @@ -5113,7 +5546,7 @@ switch(opcode) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0); label = LABEL(); - compile_char1_hotpath(common, type, cc, &nomatch); + compile_char1_trypath(common, type, cc, &nomatch); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0); if (opcode != OP_POSUPTO) { @@ -5130,13 +5563,13 @@ switch(opcode) } set_jumps(nomatch, LABEL()); if (opcode == OP_POSPLUS) - add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); break; case OP_POSQUERY: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0); - compile_char1_hotpath(common, type, cc, &nomatch); + compile_char1_trypath(common, type, cc, &nomatch); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0); set_jumps(nomatch, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); @@ -5151,16 +5584,16 @@ decrease_call_count(common); return end; } -static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static SLJIT_INLINE pcre_uchar *compile_fail_accept_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; -PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); if (*cc == OP_FAIL) { - add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP)); + add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); return cc + 1; } @@ -5180,7 +5613,7 @@ else CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); -add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); +add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); if (common->acceptlabel == NULL) add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0)); @@ -5191,11 +5624,11 @@ if (common->acceptlabel == NULL) add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); else CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel); -add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP)); +add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); return cc + 1; } -static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc) +static SLJIT_INLINE pcre_uchar *compile_close_trypath(compiler_common *common, pcre_uchar *cc) { DEFINE_COMPILER; int offset = GET2(cc, 1); @@ -5211,10 +5644,10 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); return cc + 1 + IMM2_SIZE; } -static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent) +static void compile_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; while (cc < ccend) { @@ -5250,21 +5683,24 @@ while (cc < ccend) case OP_NOT: case OP_NOTI: case OP_REVERSE: - cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks); + cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); break; case OP_SET_SOM: - PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc); + PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); allocate_stack(common, 1); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); cc++; break; case OP_CHAR: case OP_CHARI: - cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks); + if (common->mode == JIT_COMPILE) + cc = compile_charn_trypath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + else + cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); break; case OP_STAR: @@ -5332,48 +5768,48 @@ while (cc < ccend) case OP_TYPEPOSPLUS: case OP_TYPEPOSQUERY: case OP_TYPEPOSUPTO: - cc = compile_iterator_hotpath(common, cc, parent); + cc = compile_iterator_trypath(common, cc, parent); break; case OP_CLASS: case OP_NCLASS: if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE) - cc = compile_iterator_hotpath(common, cc, parent); + cc = compile_iterator_trypath(common, cc, parent); else - cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks); + cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); break; #if defined SUPPORT_UTF || defined COMPILE_PCRE16 case OP_XCLASS: if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE) - cc = compile_iterator_hotpath(common, cc, parent); + cc = compile_iterator_trypath(common, cc, parent); else - cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks); + cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); break; #endif case OP_REF: case OP_REFI: if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE) - cc = compile_ref_iterator_hotpath(common, cc, parent); + cc = compile_ref_iterator_trypath(common, cc, parent); else - cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE); + cc = compile_ref_trypath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); break; case OP_RECURSE: - cc = compile_recurse_hotpath(common, cc, parent); + cc = compile_recurse_trypath(common, cc, parent); break; case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: - PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc); - cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE); + PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); + cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); break; case OP_BRAMINZERO: - PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc); + PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc); cc = bracketend(cc + 1); if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN) { @@ -5386,7 +5822,7 @@ while (cc < ccend) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); } - FALLBACK_AS(braminzero_fallback)->hotpath = LABEL(); + BACKTRACK_AS(braminzero_backtrack)->trypath = LABEL(); if (cc[1] > OP_ASSERTBACK_NOT) decrease_call_count(common); break; @@ -5399,16 +5835,16 @@ while (cc < ccend) case OP_SBRA: case OP_SCBRA: case OP_SCOND: - cc = compile_bracket_hotpath(common, cc, parent); + cc = compile_bracket_trypath(common, cc, parent); break; case OP_BRAZERO: if (cc[1] > OP_ASSERTBACK_NOT) - cc = compile_bracket_hotpath(common, cc, parent); + cc = compile_bracket_trypath(common, cc, parent); else { - PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc); - cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE); + PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); + cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); } break; @@ -5417,17 +5853,35 @@ while (cc < ccend) case OP_SBRAPOS: case OP_SCBRAPOS: case OP_BRAPOSZERO: - cc = compile_bracketpos_hotpath(common, cc, parent); + cc = compile_bracketpos_trypath(common, cc, parent); + break; + + case OP_MARK: + PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); + SLJIT_ASSERT(common->mark_ptr != 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); + cc += 1 + 2 + cc[1]; + break; + + case OP_COMMIT: + PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); + cc += 1; break; case OP_FAIL: case OP_ACCEPT: case OP_ASSERT_ACCEPT: - cc = compile_fail_accept_hotpath(common, cc, parent); + cc = compile_fail_accept_trypath(common, cc, parent); break; case OP_CLOSE: - cc = compile_close_hotpath(common, cc); + cc = compile_close_trypath(common, cc); break; case OP_SKIPZERO: @@ -5444,22 +5898,22 @@ while (cc < ccend) SLJIT_ASSERT(cc == ccend); } -#undef PUSH_FALLBACK -#undef PUSH_FALLBACK_NOVALUE -#undef FALLBACK_AS +#undef PUSH_BACKTRACK +#undef PUSH_BACKTRACK_NOVALUE +#undef BACKTRACK_AS -#define COMPILE_FALLBACKPATH(current) \ +#define COMPILE_BACKTRACKPATH(current) \ do \ { \ - compile_fallbackpath(common, (current)); \ + compile_backtrackpath(common, (current)); \ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ return; \ } \ while (0) -#define CURRENT_AS(type) ((type*)current) +#define CURRENT_AS(type) ((type *)current) -static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; pcre_uchar *cc = current->cc; @@ -5468,6 +5922,7 @@ pcre_uchar type; int arg1 = -1, arg2 = -1; struct sljit_label *label = NULL; struct sljit_jump *jump = NULL; +jump_list *jumplist = NULL; cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL); @@ -5479,55 +5934,52 @@ switch(opcode) case OP_CRRANGE: if (type == OP_ANYNL || type == OP_EXTUNI) { - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath); } else { - if (opcode == OP_STAR || opcode == OP_UPTO) + if (opcode <= OP_PLUS || opcode == OP_UPTO) arg2 = 0; - else if (opcode == OP_PLUS) - arg2 = 1; - jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1); - OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1); + OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); skip_char_back(common); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath); - if (opcode == OP_PLUS || opcode == OP_CRRANGE) - set_jumps(current->topfallbacks, LABEL()); + JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath); + if (opcode == OP_CRRANGE) + set_jumps(current->topbacktracks, LABEL()); JUMPHERE(jump); free_stack(common, 2); + if (opcode == OP_PLUS) + set_jumps(current->topbacktracks, LABEL()); } break; case OP_MINSTAR: case OP_MINPLUS: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - if (opcode == OP_MINPLUS) - { - set_jumps(current->topfallbacks, LABEL()); - current->topfallbacks = NULL; - } - compile_char1_hotpath(common, type, cc, ¤t->topfallbacks); + compile_char1_trypath(common, type, cc, &jumplist); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath); - set_jumps(current->topfallbacks, LABEL()); + JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath); + set_jumps(jumplist, LABEL()); free_stack(common, 1); + if (opcode == OP_MINPLUS) + set_jumps(current->topbacktracks, LABEL()); break; case OP_MINUPTO: case OP_CRMINRANGE: if (opcode == OP_CRMINRANGE) { - set_jumps(current->topfallbacks, LABEL()); - current->topfallbacks = NULL; label = LABEL(); + set_jumps(current->topbacktracks, label); } OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - compile_char1_hotpath(common, type, cc, ¤t->topfallbacks); + compile_char1_trypath(common, type, cc, &jumplist); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); @@ -5538,23 +5990,23 @@ switch(opcode) CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label); if (opcode == OP_CRMINRANGE && arg1 == 0) - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath); else - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath); + CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->trypath); - set_jumps(current->topfallbacks, LABEL()); + set_jumps(jumplist, LABEL()); free_stack(common, 2); break; case OP_QUERY: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath); jump = JUMP(SLJIT_JUMP); - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath); JUMPHERE(jump); free_stack(common, 1); break; @@ -5563,16 +6015,16 @@ switch(opcode) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - compile_char1_hotpath(common, type, cc, ¤t->topfallbacks); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath); - set_jumps(current->topfallbacks, LABEL()); + compile_char1_trypath(common, type, cc, &jumplist); + JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath); + set_jumps(jumplist, LABEL()); JUMPHERE(jump); free_stack(common, 1); break; case OP_EXACT: case OP_POSPLUS: - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); break; case OP_POSSTAR: @@ -5586,7 +6038,7 @@ switch(opcode) } } -static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_ref_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; pcre_uchar *cc = current->cc; @@ -5595,30 +6047,42 @@ pcre_uchar type; type = cc[1 + IMM2_SIZE]; if ((type & 0x1) == 0) { - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath); return; } OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); -CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath); -set_jumps(current->topfallbacks, LABEL()); +CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath); +set_jumps(current->topbacktracks, LABEL()); free_stack(common, 2); } -static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_recurse_backtrackpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; -set_jumps(current->topfallbacks, LABEL()); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); -free_stack(common, 1); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); +set_jumps(current->topbacktracks, LABEL()); + +if (common->has_set_som && common->mark_ptr != 0) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + free_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0); + } +else if (common->has_set_som || common->mark_ptr != 0) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0); + } } -static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_assert_backtrackpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; pcre_uchar *cc = current->cc; @@ -5634,18 +6098,18 @@ if (*cc == OP_BRAZERO) if (bra == OP_BRAZERO) { - SLJIT_ASSERT(current->topfallbacks == NULL); + SLJIT_ASSERT(current->topbacktracks == NULL); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); } -if (CURRENT_AS(assert_fallback)->framesize < 0) +if (CURRENT_AS(assert_backtrack)->framesize < 0) { - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath); free_stack(common, 1); } return; @@ -5656,7 +6120,7 @@ if (bra == OP_BRAZERO) if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath); free_stack(common, 1); return; } @@ -5666,31 +6130,31 @@ if (bra == OP_BRAZERO) if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_w)); - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); } else - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); if (bra == OP_BRAZERO) { /* We know there is enough place on the stack. */ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->trypath); JUMPHERE(brajump); } } -static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_bracket_backtrackpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; int opcode; int offset = 0; -int localptr = CURRENT_AS(bracket_fallback)->localptr; +int localptr = CURRENT_AS(bracket_backtrack)->localptr; int stacksize; int count; pcre_uchar *cc = current->cc; @@ -5700,7 +6164,7 @@ jump_list *jumplist = NULL; jump_list *jumplistitem = NULL; pcre_uchar bra = OP_BRA; pcre_uchar ket; -assert_fallback *assert; +assert_backtrack *assert; BOOL has_alternatives; struct sljit_jump *brazero = NULL; struct sljit_jump *once = NULL; @@ -5719,7 +6183,7 @@ ket = *(bracketend(ccbegin) - 1 - LINK_SIZE); cc += GET(cc, 1); has_alternatives = *cc == OP_ALT; if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) - has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL; + has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; if (opcode == OP_CBRA || opcode == OP_SCBRA) offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) @@ -5729,9 +6193,7 @@ if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) if (ket == OP_KETRMAX) { - if (bra != OP_BRAZERO) - free_stack(common, 1); - else + if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); @@ -5746,18 +6208,18 @@ else if (ket == OP_KETRMIN) if (opcode >= OP_SBRA || opcode == OP_ONCE) { /* Checking zero-length iteration. */ - if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0) - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath); + if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0) + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_backtrack)->recursivetrypath); else { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_backtrack)->recursivetrypath); } if (opcode != OP_ONCE) free_stack(common, 1); } else - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursivetrypath); } rminlabel = LABEL(); } @@ -5770,7 +6232,7 @@ else if (bra == OP_BRAZERO) if (SLJIT_UNLIKELY(opcode == OP_ONCE)) { - if (CURRENT_AS(bracket_fallback)->u.framesize >= 0) + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); @@ -5825,9 +6287,9 @@ else if (*cc == OP_ALT) cc = ccbegin + GET(ccbegin, 1); } -COMPILE_FALLBACKPATH(current->top); -if (current->topfallbacks) - set_jumps(current->topfallbacks, LABEL()); +COMPILE_BACKTRACKPATH(current->top); +if (current->topbacktracks) + set_jumps(current->topbacktracks, LABEL()); if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) { @@ -5835,7 +6297,7 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) { SLJIT_ASSERT(has_alternatives); - assert = CURRENT_AS(bracket_fallback)->u.assert; + assert = CURRENT_AS(bracket_backtrack)->u.assert; if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr); @@ -5843,13 +6305,13 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w)); } cond = JUMP(SLJIT_JUMP); - set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL()); + set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); } - else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL) + else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) { SLJIT_ASSERT(has_alternatives); cond = JUMP(SLJIT_JUMP); - set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL()); + set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); } else SLJIT_ASSERT(!has_alternatives); @@ -5861,8 +6323,8 @@ if (has_alternatives) do { current->top = NULL; - current->topfallbacks = NULL; - current->nextfallbacks = NULL; + current->topbacktracks = NULL; + current->nextbacktracks = NULL; if (*cc == OP_ALT) { ccprev = cc + 1 + LINK_SIZE; @@ -5874,16 +6336,16 @@ if (has_alternatives) else OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); } - compile_hotpath(common, ccprev, cc, current); + compile_trypath(common, ccprev, cc, current); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; } /* Instructions after the current alternative is succesfully matched. */ - /* There is a similar code in compile_bracket_hotpath. */ + /* There is a similar code in compile_bracket_trypath. */ if (opcode == OP_ONCE) { - if (CURRENT_AS(bracket_fallback)->u.framesize < 0) + if (CURRENT_AS(bracket_backtrack)->u.framesize < 0) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); /* TMP2 which is set here used by OP_KETRMAX below. */ @@ -5897,7 +6359,7 @@ if (has_alternatives) } else { - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w)); + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize + 2) * sizeof(sljit_w)); if (ket == OP_KETRMAX) { /* TMP2 which is set here used by OP_KETRMAX below. */ @@ -5913,7 +6375,7 @@ if (has_alternatives) stacksize++; if (stacksize > 0) { - if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0) + if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize >= 0) allocate_stack(common, stacksize); else { @@ -5943,7 +6405,7 @@ if (has_alternatives) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0); } - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alttrypath); if (opcode != OP_ONCE) { @@ -5952,10 +6414,10 @@ if (has_alternatives) jumplist = jumplist->next; } - COMPILE_FALLBACKPATH(current->top); - if (current->topfallbacks) - set_jumps(current->topfallbacks, LABEL()); - SLJIT_ASSERT(!current->nextfallbacks); + COMPILE_BACKTRACKPATH(current->top); + if (current->topbacktracks) + set_jumps(current->topbacktracks, LABEL()); + SLJIT_ASSERT(!current->nextbacktracks); } while (*cc == OP_ALT); SLJIT_ASSERT(!jumplist); @@ -5963,7 +6425,7 @@ if (has_alternatives) if (cond != NULL) { SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); - assert = CURRENT_AS(bracket_fallback)->u.assert; + assert = CURRENT_AS(bracket_backtrack)->u.assert; if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) { @@ -5997,11 +6459,11 @@ else if (opcode == OP_SBRA || opcode == OP_SCOND) else if (opcode == OP_ONCE) { cc = ccbegin + GET(ccbegin, 1); - if (CURRENT_AS(bracket_fallback)->u.framesize >= 0) + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) { /* Reset head and drop saved frame. */ stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1; - free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize); + free_stack(common, CURRENT_AS(bracket_backtrack)->u.framesize + stacksize); } else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN)) { @@ -6011,8 +6473,8 @@ else if (opcode == OP_ONCE) JUMPHERE(once); /* Restore previous localptr */ - if (CURRENT_AS(bracket_fallback)->u.framesize >= 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w)); + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_w)); else if (ket == OP_KETRMIN) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); @@ -6025,20 +6487,22 @@ else if (opcode == OP_ONCE) if (ket == OP_KETRMAX) { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath); + if (bra != OP_BRAZERO) + free_stack(common, 1); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursivetrypath); if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zerotrypath); JUMPHERE(brazero); + free_stack(common, 1); } - free_stack(common, 1); } else if (ket == OP_KETRMIN) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - /* OP_ONCE removes everything in case of a fallback, so we don't + /* OP_ONCE removes everything in case of a backtrack, so we don't need to explicitly release the STR_PTR. The extra release would affect badly the free_stack(2) above. */ if (opcode != OP_ONCE) @@ -6052,18 +6516,18 @@ else if (ket == OP_KETRMIN) else if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zerotrypath); JUMPHERE(brazero); } } -static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_bracketpos_backtrackpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; int offset; struct sljit_jump *jump; -if (CURRENT_AS(bracketpos_fallback)->framesize < 0) +if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) { if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS) { @@ -6073,57 +6537,57 @@ if (CURRENT_AS(bracketpos_fallback)->framesize < 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); } - set_jumps(current->topfallbacks, LABEL()); - free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize); + set_jumps(current->topbacktracks, LABEL()); + free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); return; } -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->localptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); -if (current->topfallbacks) +if (current->topbacktracks) { jump = JUMP(SLJIT_JUMP); - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); /* Drop the stack frame. */ - free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize); + free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); JUMPHERE(jump); } -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w)); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_w)); } -static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_braminzero_backtrackpath(compiler_common *common, struct backtrack_common *current) { -assert_fallback fallback; +assert_backtrack backtrack; current->top = NULL; -current->topfallbacks = NULL; -current->nextfallbacks = NULL; +current->topbacktracks = NULL; +current->nextbacktracks = NULL; if (current->cc[1] > OP_ASSERTBACK_NOT) { - /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */ - compile_bracket_hotpath(common, current->cc, current); - compile_bracket_fallbackpath(common, current->top); + /* Manual call of compile_bracket_trypath and compile_bracket_backtrackpath. */ + compile_bracket_trypath(common, current->cc, current); + compile_bracket_backtrackpath(common, current->top); } else { - memset(&fallback, 0, sizeof(fallback)); - fallback.common.cc = current->cc; - fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath; - /* Manual call of compile_assert_hotpath. */ - compile_assert_hotpath(common, current->cc, &fallback, FALSE); + memset(&backtrack, 0, sizeof(backtrack)); + backtrack.common.cc = current->cc; + backtrack.trypath = CURRENT_AS(braminzero_backtrack)->trypath; + /* Manual call of compile_assert_trypath. */ + compile_assert_trypath(common, current->cc, &backtrack, FALSE); } -SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks); +SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks); } -static void compile_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_backtrackpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; while (current) { - if (current->nextfallbacks != NULL) - set_jumps(current->nextfallbacks, LABEL()); + if (current->nextbacktracks != NULL) + set_jumps(current->nextbacktracks, LABEL()); switch(*current->cc) { case OP_SET_SOM: @@ -6202,23 +6666,23 @@ while (current) #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: #endif - compile_iterator_fallbackpath(common, current); + compile_iterator_backtrackpath(common, current); break; case OP_REF: case OP_REFI: - compile_ref_iterator_fallbackpath(common, current); + compile_ref_iterator_backtrackpath(common, current); break; case OP_RECURSE: - compile_recurse_fallbackpath(common, current); + compile_recurse_backtrackpath(common, current); break; case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: - compile_assert_fallbackpath(common, current); + compile_assert_backtrackpath(common, current); break; case OP_ONCE: @@ -6229,14 +6693,14 @@ while (current) case OP_SBRA: case OP_SCBRA: case OP_SCOND: - compile_bracket_fallbackpath(common, current); + compile_bracket_backtrackpath(common, current); break; case OP_BRAZERO: if (current->cc[1] > OP_ASSERTBACK_NOT) - compile_bracket_fallbackpath(common, current); + compile_bracket_backtrackpath(common, current); else - compile_assert_fallbackpath(common, current); + compile_assert_backtrackpath(common, current); break; case OP_BRAPOS: @@ -6244,17 +6708,31 @@ while (current) case OP_SBRAPOS: case OP_SCBRAPOS: case OP_BRAPOSZERO: - compile_bracketpos_fallbackpath(common, current); + compile_bracketpos_backtrackpath(common, current); break; case OP_BRAMINZERO: - compile_braminzero_fallbackpath(common, current); + compile_braminzero_backtrackpath(common, current); + break; + + case OP_MARK: + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0); + break; + + case OP_COMMIT: + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); + if (common->leavelabel == NULL) + add_jump(compiler, &common->leave, JUMP(SLJIT_JUMP)); + else + JUMPTO(SLJIT_JUMP, common->leavelabel); break; case OP_FAIL: case OP_ACCEPT: case OP_ASSERT_ACCEPT: - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); break; default: @@ -6275,7 +6753,9 @@ int localsize = get_localsize(common, ccbegin, ccend); int framesize = get_framesize(common, cc, TRUE); int alternativesize; BOOL needsframe; -fallback_common altfallback; +backtrack_common altbacktrack; +struct sljit_label *save_leavelabel = common->leavelabel; +jump_list *save_leave = common->leave; struct sljit_jump *jump; SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS); @@ -6284,64 +6764,75 @@ if (!needsframe) framesize = 0; alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0; -SLJIT_ASSERT(common->currententry->entry == NULL); +SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head != 0); common->currententry->entry = LABEL(); set_jumps(common->currententry->calls, common->currententry->entry); -sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, TMP2, 0); allocate_stack(common, localsize + framesize + alternativesize); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0); copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, STACK_TOP, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, STACK_TOP, 0); if (needsframe) - init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE); + init_frame(common, cc, framesize + alternativesize - 1, alternativesize, TRUE); if (alternativesize > 0) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); -memset(&altfallback, 0, sizeof(fallback_common)); +memset(&altbacktrack, 0, sizeof(backtrack_common)); +common->leavelabel = NULL; common->acceptlabel = NULL; +common->leave = NULL; common->accept = NULL; -altfallback.cc = ccbegin; +altbacktrack.cc = ccbegin; cc += GET(cc, 1); while (1) { - altfallback.top = NULL; - altfallback.topfallbacks = NULL; + altbacktrack.top = NULL; + altbacktrack.topbacktracks = NULL; - if (altfallback.cc != ccbegin) + if (altbacktrack.cc != ccbegin) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - compile_hotpath(common, altfallback.cc, cc, &altfallback); + compile_trypath(common, altbacktrack.cc, cc, &altbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + common->leavelabel = save_leavelabel; + common->leave = save_leave; return; + } add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); - compile_fallbackpath(common, altfallback.top); + compile_backtrackpath(common, altbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + common->leavelabel = save_leavelabel; + common->leave = save_leave; return; - set_jumps(altfallback.topfallbacks, LABEL()); + } + set_jumps(altbacktrack.topbacktracks, LABEL()); if (*cc != OP_ALT) break; - altfallback.cc = cc + 1 + LINK_SIZE; + altbacktrack.cc = cc + 1 + LINK_SIZE; cc += GET(cc, 1); } /* None of them matched. */ +if (common->leave != NULL) + set_jumps(common->leave, LABEL()); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); jump = JUMP(SLJIT_JUMP); set_jumps(common->accept, LABEL()); -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head); if (needsframe) { - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w)); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0); } OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); @@ -6350,31 +6841,34 @@ copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesi free_stack(common, localsize + framesize + alternativesize); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w)); OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, TMP2, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, TMP2, 0); sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0); + +common->leavelabel = save_leavelabel; +common->leave = save_leave; } -#undef COMPILE_FALLBACKPATH +#undef COMPILE_BACKTRACKPATH #undef CURRENT_AS void -PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra) +PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode) { struct sljit_compiler *compiler; -fallback_common rootfallback; +backtrack_common rootbacktrack; compiler_common common_data; compiler_common *common = &common_data; const pcre_uint8 *tables = re->tables; pcre_study_data *study; +int localsize; pcre_uchar *ccend; -executable_function *function; +executable_functions *functions; void *executable_func; sljit_uw executable_size; -struct sljit_label *leave; struct sljit_label *mainloop = NULL; struct sljit_label *empty_match_found; -struct sljit_label *empty_match_fallback; -struct sljit_jump *alloc_error; +struct sljit_label *empty_match_backtrack; +struct sljit_jump *jump; struct sljit_jump *reqbyte_notfound = NULL; struct sljit_jump *empty_match; @@ -6384,14 +6878,14 @@ study = extra->study_data; if (!tables) tables = PRIV(default_tables); -memset(&rootfallback, 0, sizeof(fallback_common)); -rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; +memset(&rootbacktrack, 0, sizeof(backtrack_common)); +memset(common, 0, sizeof(compiler_common)); +rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; -common->compiler = NULL; -common->start = rootfallback.cc; -common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w); +common->start = rootbacktrack.cc; common->fcc = tables + fcc_offset; common->lcc = (sljit_w)(tables + lcc_offset); +common->mode = mode; common->nltype = NLTYPE_FIXED; switch(re->options & PCRE_NEWLINE_BITS) { @@ -6429,20 +6923,6 @@ common->ctypes = (sljit_w)(tables + ctypes_offset); common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset); common->name_count = re->name_count; common->name_entry_size = re->name_entry_size; -common->acceptlabel = NULL; -common->stubs = NULL; -common->entries = NULL; -common->currententry = NULL; -common->accept = NULL; -common->calllimit = NULL; -common->stackalloc = NULL; -common->revertframes = NULL; -common->wordboundary = NULL; -common->anynewline = NULL; -common->hspace = NULL; -common->vspace = NULL; -common->casefulcmp = NULL; -common->caselesscmp = NULL; common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; #ifdef SUPPORT_UTF /* PCRE_UTF16 has the same value as PCRE_UTF8. */ @@ -6450,26 +6930,52 @@ common->utf = (re->options & PCRE_UTF8) != 0; #ifdef SUPPORT_UCP common->use_ucp = (re->options & PCRE_UCP) != 0; #endif -common->utfreadchar = NULL; -#ifdef COMPILE_PCRE8 -common->utfreadtype8 = NULL; -#endif #endif /* SUPPORT_UTF */ -#ifdef SUPPORT_UCP -common->getucd = NULL; -#endif -ccend = bracketend(rootfallback.cc); -SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); -common->localsize = get_localspace(common, rootfallback.cc, ccend); -if (common->localsize < 0) +ccend = bracketend(rootbacktrack.cc); + +/* Calculate the local space size on the stack. */ +common->ovector_start = CALL_LIMIT + sizeof(sljit_w); + +SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); +localsize = get_localspace(common, rootbacktrack.cc, ccend); +if (localsize < 0) return; -common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w); -if (common->localsize > SLJIT_MAX_LOCAL_SIZE) + +/* Checking flags and updating ovector_start. */ +if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) + { + common->req_char_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_w); + } +if (mode != JIT_COMPILE) + { + common->start_used_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_w); + if (mode == JIT_PARTIAL_SOFT_COMPILE) + { + common->hit_start = common->ovector_start; + common->ovector_start += sizeof(sljit_w); + } + } +if ((re->options & PCRE_FIRSTLINE) != 0) + { + common->first_line_end = common->ovector_start; + common->ovector_start += sizeof(sljit_w); + } + +/* Aligning ovector to even number of sljit words. */ +if ((common->ovector_start & sizeof(sljit_w)) != 0) + common->ovector_start += sizeof(sljit_w); + +SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); +common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w); +localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w); +if (localsize > SLJIT_MAX_LOCAL_SIZE) return; -common->localptrs = (int*)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int)); +common->localptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(int)); if (!common->localptrs) return; -memset(common->localptrs, 0, (ccend - rootfallback.cc) * sizeof(int)); +memset(common->localptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int)); set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend); compiler = sljit_create_compiler(); @@ -6481,12 +6987,12 @@ if (!compiler) common->compiler = compiler; /* Main pcre_jit_exec entry. */ -sljit_emit_enter(compiler, 1, 5, 5, common->localsize); +sljit_emit_enter(compiler, 1, 5, 5, localsize); /* Register init. */ reset_ovector(common, (re->top_bracket + 1) * 2); -if ((re->flags & PCRE_REQCHSET) != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, SLJIT_TEMPORARY_REG1, 0); +if (common->req_char_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_TEMPORARY_REG1, 0); OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0); @@ -6498,27 +7004,44 @@ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0); +if (mode == JIT_PARTIAL_SOFT_COMPILE) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); + /* Main part of the matching */ if ((re->options & PCRE_ANCHORED) == 0) { mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0); /* Forward search if possible. */ - if ((re->flags & PCRE_FIRSTSET) != 0) - fast_forward_first_char(common, re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0); - else if ((re->flags & PCRE_STARTLINE) != 0) - fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); - else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) - fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); + if ((re->options & PCRE_NO_START_OPTIMIZE) == 0) + { + if ((re->flags & PCRE_FIRSTSET) != 0) + fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0); + else if ((re->flags & PCRE_STARTLINE) != 0) + fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); + else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) + fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); + } } -if ((re->flags & PCRE_REQCHSET) != 0) - reqbyte_notfound = search_requested_char(common, re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); +if (common->req_char_ptr != 0) + reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); /* Store the current STR_PTR in OVECTOR(0). */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0); /* Copy the limit of allowed recursions. */ OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT); +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0); +/* Copy the beginning of the string. */ +if (mode == JIT_PARTIAL_SOFT_COMPILE) + { + jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +else if (mode == JIT_PARTIAL_HARD_COMPILE) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); -compile_hotpath(common, rootfallback.cc, ccend, &rootfallback); +compile_trypath(common, rootbacktrack.cc, ccend, &rootbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); @@ -6535,11 +7058,20 @@ if (common->accept != NULL) /* This means we have a match. Update the ovector. */ copy_ovector(common, re->top_bracket + 1); -leave = LABEL(); +common->leavelabel = LABEL(); +if (common->leave != NULL) + set_jumps(common->leave, common->leavelabel); sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); -empty_match_fallback = LABEL(); -compile_fallbackpath(common, rootfallback.top); +if (mode != JIT_COMPILE) + { + common->partialmatchlabel = LABEL(); + set_jumps(common->partialmatch, common->partialmatchlabel); + return_with_partial_match(common, common->leavelabel); + } + +empty_match_backtrack = LABEL(); +compile_backtrackpath(common, rootbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); @@ -6547,7 +7079,17 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; } -SLJIT_ASSERT(rootfallback.prev == NULL); +SLJIT_ASSERT(rootbacktrack.prev == NULL); + +if (mode == JIT_PARTIAL_SOFT_COMPILE) + { + /* Update hit_start only in the first time. */ + jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0); + JUMPHERE(jump); + } /* Check we have remaining characters. */ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); @@ -6556,9 +7098,9 @@ if ((re->options & PCRE_ANCHORED) == 0) { if ((re->options & PCRE_FIRSTLINE) == 0) { - if (study != NULL && study->minlength > 1) + if (mode == JIT_COMPILE && study != NULL && study->minlength > 1 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1)); CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop); } else @@ -6566,38 +7108,42 @@ if ((re->options & PCRE_ANCHORED) == 0) } else { - if (study != NULL && study->minlength > 1) + SLJIT_ASSERT(common->first_line_end != 0); + if (mode == JIT_COMPILE && study != NULL && study->minlength > 1 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1)); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_GREATER_EQUAL); JUMPTO(SLJIT_C_ZERO, mainloop); } else - CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, mainloop); + CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, mainloop); } } +/* No more remaining characters. */ if (reqbyte_notfound != NULL) JUMPHERE(reqbyte_notfound); -/* Copy OVECTOR(1) to OVECTOR(0) */ -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); + +if (mode == JIT_PARTIAL_SOFT_COMPILE) + CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0, common->partialmatchlabel); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); -JUMPTO(SLJIT_JUMP, leave); +JUMPTO(SLJIT_JUMP, common->leavelabel); flush_stubs(common); JUMPHERE(empty_match); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); -CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_fallback); +CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found); -JUMPTO(SLJIT_JUMP, empty_match_fallback); +JUMPTO(SLJIT_JUMP, empty_match_backtrack); common->currententry = common->entries; while (common->currententry != NULL) @@ -6618,7 +7164,7 @@ while (common->currententry != NULL) /* This is a (really) rare case. */ set_jumps(common->stackalloc, LABEL()); /* RETURN_ADDR is not a saved register. */ -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); @@ -6626,7 +7172,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_ OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE); sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); -alloc_error = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top)); @@ -6635,15 +7181,15 @@ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); /* Allocation failed. */ -JUMPHERE(alloc_error); +JUMPHERE(jump); /* We break the return address cache here, but this is a really rare case. */ OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT); -JUMPTO(SLJIT_JUMP, leave); +JUMPTO(SLJIT_JUMP, common->leavelabel); /* Call limit reached. */ set_jumps(common->calllimit, LABEL()); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT); -JUMPTO(SLJIT_JUMP, leave); +JUMPTO(SLJIT_JUMP, common->leavelabel); if (common->revertframes != NULL) { @@ -6695,10 +7241,20 @@ if (common->utfreadtype8 != NULL) #endif #endif /* COMPILE_PCRE8 */ #ifdef SUPPORT_UCP -if (common->getucd != NULL) +if (common->getunichartype != NULL) { - set_jumps(common->getucd, LABEL()); - do_getucd(common); + set_jumps(common->getunichartype, LABEL()); + do_getunichartype(common); + } +if (common->getunichartype_2 != NULL) + { + set_jumps(common->getunichartype_2, LABEL()); + do_getunichartype_2(common); + } +if (common->getunicharscript != NULL) + { + set_jumps(common->getunicharscript, LABEL()); + do_getunicharscript(common); } #endif @@ -6709,24 +7265,29 @@ sljit_free_compiler(compiler); if (executable_func == NULL) return; -function = SLJIT_MALLOC(sizeof(executable_function)); -if (function == NULL) +/* Reuse the function descriptor if possible. */ +if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) + functions = (executable_functions *)extra->executable_jit; +else { - /* This case is highly unlikely since we just recently - freed a lot of memory. Although not impossible. */ - sljit_free_code(executable_func); - return; + functions = SLJIT_MALLOC(sizeof(executable_functions)); + if (functions == NULL) + { + /* This case is highly unlikely since we just recently + freed a lot of memory. Although not impossible. */ + sljit_free_code(executable_func); + return; + } + memset(functions, 0, sizeof(executable_functions)); + extra->executable_jit = functions; + extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT; } -function->executable_func = executable_func; -function->executable_size = executable_size; -function->callback = NULL; -function->userdata = NULL; -extra->executable_jit = function; -extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT; +functions->executable_funcs[mode] = executable_func; +functions->executable_sizes[mode] = executable_size; } -static int jit_machine_stack_exec(jit_arguments *arguments, executable_function *function) +static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func) { union { void* executable_func; @@ -6740,16 +7301,15 @@ local_stack.base = local_stack.top; local_stack.limit = local_stack.base + LOCAL_SPACE_SIZE; local_stack.max_limit = local_stack.limit; arguments->stack = &local_stack; -convert_executable_func.executable_func = function->executable_func; +convert_executable_func.executable_func = executable_func; return convert_executable_func.call_executable_func(arguments); } int -PRIV(jit_exec)(const REAL_PCRE *re, void *executable_func, - const pcre_uchar *subject, int length, int start_offset, int options, - int match_limit, int *offsets, int offsetcount) +PRIV(jit_exec)(const REAL_PCRE *re, const PUBL(extra) *extra_data, const pcre_uchar *subject, + int length, int start_offset, int options, int *offsets, int offsetcount) { -executable_function *function = (executable_function*)executable_func; +executable_functions *functions = (executable_functions *)extra_data->executable_jit; union { void* executable_func; jit_function call_executable_func; @@ -6757,13 +7317,24 @@ union { jit_arguments arguments; int maxoffsetcount; int retval; +int mode = JIT_COMPILE; + +if ((options & PCRE_PARTIAL_HARD) != 0) + mode = JIT_PARTIAL_HARD_COMPILE; +else if ((options & PCRE_PARTIAL_SOFT) != 0) + mode = JIT_PARTIAL_SOFT_COMPILE; + +if (functions->executable_funcs[mode] == NULL) + return PCRE_ERROR_NULL; /* Sanity checks should be handled by pcre_exec. */ arguments.stack = NULL; arguments.str = subject + start_offset; arguments.begin = subject; arguments.end = subject + length; -arguments.calllimit = match_limit; /* JIT decreases this value less times. */ +arguments.mark_ptr = NULL; +/* JIT decreases this value less frequently than the interpreter. */ +arguments.calllimit = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : extra_data->match_limit; arguments.notbol = (options & PCRE_NOTBOL) != 0; arguments.noteol = (options & PCRE_NOTEOL) != 0; arguments.notempty = (options & PCRE_NOTEMPTY) != 0; @@ -6783,36 +7354,49 @@ if (offsetcount > maxoffsetcount) offsetcount = maxoffsetcount; arguments.offsetcount = offsetcount; -if (function->callback) - arguments.stack = (struct sljit_stack*)function->callback(function->userdata); +if (functions->callback) + arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata); else - arguments.stack = (struct sljit_stack*)function->userdata; + arguments.stack = (struct sljit_stack *)functions->userdata; if (arguments.stack == NULL) - retval = jit_machine_stack_exec(&arguments, function); + retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]); else { - convert_executable_func.executable_func = function->executable_func; + convert_executable_func.executable_func = functions->executable_funcs[mode]; retval = convert_executable_func.call_executable_func(&arguments); } if (retval * 2 > offsetcount) retval = 0; +if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) + *(extra_data->mark) = arguments.mark_ptr; + return retval; } void -PRIV(jit_free)(void *executable_func) +PRIV(jit_free)(void *executable_funcs) { -executable_function *function = (executable_function*)executable_func; -sljit_free_code(function->executable_func); -SLJIT_FREE(function); +int i; +executable_functions *functions = (executable_functions *)executable_funcs; +for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) + { + if (functions->executable_funcs[i] != NULL) + sljit_free_code(functions->executable_funcs[i]); + } +SLJIT_FREE(functions); } int -PRIV(jit_get_size)(void *executable_func) +PRIV(jit_get_size)(void *executable_funcs) { -return ((executable_function*)executable_func)->executable_size; +int i; +sljit_uw size = 0; +sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes; +for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) + size += executable_sizes[i]; +return (int)size; } const char* @@ -6846,7 +7430,7 @@ PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *stack) #endif { -sljit_free_stack((struct sljit_stack*)stack); +sljit_free_stack((struct sljit_stack *)stack); } #ifdef COMPILE_PCRE8 @@ -6857,14 +7441,14 @@ PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) #endif { -executable_function *function; +executable_functions *functions; if (extra != NULL && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) { - function = (executable_function*)extra->executable_jit; - function->callback = callback; - function->userdata = userdata; + functions = (executable_functions *)extra->executable_jit; + functions->callback = callback; + functions->userdata = userdata; } } diff --git a/glib/pcre/pcre_study.c b/glib/pcre/pcre_study.c index 61deb56f0..85cb514fb 100644 --- a/glib/pcre/pcre_study.c +++ b/glib/pcre/pcre_study.c @@ -1123,7 +1123,7 @@ do case OP_HSPACE: SET_BIT(0x09); SET_BIT(0x20); -#ifdef COMPILE_PCRE8 +#ifdef SUPPORT_UTF if (utf) { #ifdef COMPILE_PCRE8 @@ -1148,7 +1148,7 @@ do SET_BIT(0x0B); SET_BIT(0x0C); SET_BIT(0x0D); -#ifdef COMPILE_PCRE8 +#ifdef SUPPORT_UTF if (utf) { #ifdef COMPILE_PCRE8 @@ -1418,7 +1418,8 @@ we don't have to change that code. */ if (bits_set || min > 0 #ifdef SUPPORT_JIT - || (options & PCRE_STUDY_JIT_COMPILE) != 0 + || (options & (PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)) != 0 #endif ) { @@ -1478,7 +1479,13 @@ if (bits_set || min > 0 #ifdef SUPPORT_JIT extra->executable_jit = NULL; - if ((options & PCRE_STUDY_JIT_COMPILE) != 0) PRIV(jit_compile)(re, extra); + if ((options & PCRE_STUDY_JIT_COMPILE) != 0) + PRIV(jit_compile)(re, extra, JIT_COMPILE); + if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0) + PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE); + if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0) + PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE); + if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0) { #ifdef COMPILE_PCRE8 diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c index ddbf950a9..b580a4a73 100644 --- a/glib/pcre/pcre_tables.c +++ b/glib/pcre/pcre_tables.c @@ -435,151 +435,151 @@ const char PRIV(utt_names)[] = STRING_Zs0; const ucp_type_table PRIV(utt)[] = { - { 0, PT_ANY, 0 }, - { 4, PT_SC, ucp_Arabic }, - { 11, PT_SC, ucp_Armenian }, - { 20, PT_SC, ucp_Avestan }, - { 28, PT_SC, ucp_Balinese }, - { 37, PT_SC, ucp_Bamum }, - { 43, PT_SC, ucp_Batak }, - { 49, PT_SC, ucp_Bengali }, - { 57, PT_SC, ucp_Bopomofo }, - { 66, PT_SC, ucp_Brahmi }, - { 73, PT_SC, ucp_Braille }, - { 81, PT_SC, ucp_Buginese }, - { 90, PT_SC, ucp_Buhid }, - { 96, PT_GC, ucp_C }, - { 98, PT_SC, ucp_Canadian_Aboriginal }, - { 118, PT_SC, ucp_Carian }, - { 125, PT_PC, ucp_Cc }, - { 128, PT_PC, ucp_Cf }, - { 131, PT_SC, ucp_Chakma }, - { 138, PT_SC, ucp_Cham }, - { 143, PT_SC, ucp_Cherokee }, - { 152, PT_PC, ucp_Cn }, - { 155, PT_PC, ucp_Co }, - { 158, PT_SC, ucp_Common }, - { 165, PT_SC, ucp_Coptic }, - { 172, PT_PC, ucp_Cs }, - { 175, PT_SC, ucp_Cuneiform }, - { 185, PT_SC, ucp_Cypriot }, - { 193, PT_SC, ucp_Cyrillic }, - { 202, PT_SC, ucp_Deseret }, - { 210, PT_SC, ucp_Devanagari }, - { 221, PT_SC, ucp_Egyptian_Hieroglyphs }, - { 242, PT_SC, ucp_Ethiopic }, - { 251, PT_SC, ucp_Georgian }, - { 260, PT_SC, ucp_Glagolitic }, - { 271, PT_SC, ucp_Gothic }, - { 278, PT_SC, ucp_Greek }, - { 284, PT_SC, ucp_Gujarati }, - { 293, PT_SC, ucp_Gurmukhi }, - { 302, PT_SC, ucp_Han }, - { 306, PT_SC, ucp_Hangul }, - { 313, PT_SC, ucp_Hanunoo }, - { 321, PT_SC, ucp_Hebrew }, - { 328, PT_SC, ucp_Hiragana }, - { 337, PT_SC, ucp_Imperial_Aramaic }, - { 354, PT_SC, ucp_Inherited }, - { 364, PT_SC, ucp_Inscriptional_Pahlavi }, - { 386, PT_SC, ucp_Inscriptional_Parthian }, - { 409, PT_SC, ucp_Javanese }, - { 418, PT_SC, ucp_Kaithi }, - { 425, PT_SC, ucp_Kannada }, - { 433, PT_SC, ucp_Katakana }, - { 442, PT_SC, ucp_Kayah_Li }, - { 451, PT_SC, ucp_Kharoshthi }, - { 462, PT_SC, ucp_Khmer }, - { 468, PT_GC, ucp_L }, - { 470, PT_LAMP, 0 }, - { 473, PT_SC, ucp_Lao }, - { 477, PT_SC, ucp_Latin }, - { 483, PT_SC, ucp_Lepcha }, - { 490, PT_SC, ucp_Limbu }, - { 496, PT_SC, ucp_Linear_B }, - { 505, PT_SC, ucp_Lisu }, - { 510, PT_PC, ucp_Ll }, - { 513, PT_PC, ucp_Lm }, - { 516, PT_PC, ucp_Lo }, - { 519, PT_PC, ucp_Lt }, - { 522, PT_PC, ucp_Lu }, - { 525, PT_SC, ucp_Lycian }, - { 532, PT_SC, ucp_Lydian }, - { 539, PT_GC, ucp_M }, - { 541, PT_SC, ucp_Malayalam }, - { 551, PT_SC, ucp_Mandaic }, - { 559, PT_PC, ucp_Mc }, - { 562, PT_PC, ucp_Me }, - { 565, PT_SC, ucp_Meetei_Mayek }, - { 578, PT_SC, ucp_Meroitic_Cursive }, - { 595, PT_SC, ucp_Meroitic_Hieroglyphs }, - { 616, PT_SC, ucp_Miao }, - { 621, PT_PC, ucp_Mn }, - { 624, PT_SC, ucp_Mongolian }, - { 634, PT_SC, ucp_Myanmar }, - { 642, PT_GC, ucp_N }, - { 644, PT_PC, ucp_Nd }, - { 647, PT_SC, ucp_New_Tai_Lue }, - { 659, PT_SC, ucp_Nko }, - { 663, PT_PC, ucp_Nl }, - { 666, PT_PC, ucp_No }, - { 669, PT_SC, ucp_Ogham }, - { 675, PT_SC, ucp_Ol_Chiki }, - { 684, PT_SC, ucp_Old_Italic }, - { 695, PT_SC, ucp_Old_Persian }, - { 707, PT_SC, ucp_Old_South_Arabian }, - { 725, PT_SC, ucp_Old_Turkic }, - { 736, PT_SC, ucp_Oriya }, - { 742, PT_SC, ucp_Osmanya }, - { 750, PT_GC, ucp_P }, - { 752, PT_PC, ucp_Pc }, - { 755, PT_PC, ucp_Pd }, - { 758, PT_PC, ucp_Pe }, - { 761, PT_PC, ucp_Pf }, - { 764, PT_SC, ucp_Phags_Pa }, - { 773, PT_SC, ucp_Phoenician }, - { 784, PT_PC, ucp_Pi }, - { 787, PT_PC, ucp_Po }, - { 790, PT_PC, ucp_Ps }, - { 793, PT_SC, ucp_Rejang }, - { 800, PT_SC, ucp_Runic }, - { 806, PT_GC, ucp_S }, - { 808, PT_SC, ucp_Samaritan }, - { 818, PT_SC, ucp_Saurashtra }, - { 829, PT_PC, ucp_Sc }, - { 832, PT_SC, ucp_Sharada }, - { 840, PT_SC, ucp_Shavian }, - { 848, PT_SC, ucp_Sinhala }, - { 856, PT_PC, ucp_Sk }, - { 859, PT_PC, ucp_Sm }, - { 862, PT_PC, ucp_So }, - { 865, PT_SC, ucp_Sora_Sompeng }, - { 878, PT_SC, ucp_Sundanese }, - { 888, PT_SC, ucp_Syloti_Nagri }, - { 901, PT_SC, ucp_Syriac }, - { 908, PT_SC, ucp_Tagalog }, - { 916, PT_SC, ucp_Tagbanwa }, - { 925, PT_SC, ucp_Tai_Le }, - { 932, PT_SC, ucp_Tai_Tham }, - { 941, PT_SC, ucp_Tai_Viet }, - { 950, PT_SC, ucp_Takri }, - { 956, PT_SC, ucp_Tamil }, - { 962, PT_SC, ucp_Telugu }, - { 969, PT_SC, ucp_Thaana }, - { 976, PT_SC, ucp_Thai }, - { 981, PT_SC, ucp_Tibetan }, - { 989, PT_SC, ucp_Tifinagh }, - { 998, PT_SC, ucp_Ugaritic }, - { 1007, PT_SC, ucp_Vai }, - { 1011, PT_ALNUM, 0 }, - { 1015, PT_PXSPACE, 0 }, - { 1019, PT_SPACE, 0 }, - { 1023, PT_WORD, 0 }, - { 1027, PT_SC, ucp_Yi }, - { 1030, PT_GC, ucp_Z }, - { 1032, PT_PC, ucp_Zl }, - { 1035, PT_PC, ucp_Zp }, - { 1038, PT_PC, ucp_Zs } + { 0, PT_ANY, 0 }, + { 4, PT_SC, ucp_Arabic }, + { 11, PT_SC, ucp_Armenian }, + { 20, PT_SC, ucp_Avestan }, + { 28, PT_SC, ucp_Balinese }, + { 37, PT_SC, ucp_Bamum }, + { 43, PT_SC, ucp_Batak }, + { 49, PT_SC, ucp_Bengali }, + { 57, PT_SC, ucp_Bopomofo }, + { 66, PT_SC, ucp_Brahmi }, + { 73, PT_SC, ucp_Braille }, + { 81, PT_SC, ucp_Buginese }, + { 90, PT_SC, ucp_Buhid }, + { 96, PT_GC, ucp_C }, + { 98, PT_SC, ucp_Canadian_Aboriginal }, + { 118, PT_SC, ucp_Carian }, + { 125, PT_PC, ucp_Cc }, + { 128, PT_PC, ucp_Cf }, + { 131, PT_SC, ucp_Chakma }, + { 138, PT_SC, ucp_Cham }, + { 143, PT_SC, ucp_Cherokee }, + { 152, PT_PC, ucp_Cn }, + { 155, PT_PC, ucp_Co }, + { 158, PT_SC, ucp_Common }, + { 165, PT_SC, ucp_Coptic }, + { 172, PT_PC, ucp_Cs }, + { 175, PT_SC, ucp_Cuneiform }, + { 185, PT_SC, ucp_Cypriot }, + { 193, PT_SC, ucp_Cyrillic }, + { 202, PT_SC, ucp_Deseret }, + { 210, PT_SC, ucp_Devanagari }, + { 221, PT_SC, ucp_Egyptian_Hieroglyphs }, + { 242, PT_SC, ucp_Ethiopic }, + { 251, PT_SC, ucp_Georgian }, + { 260, PT_SC, ucp_Glagolitic }, + { 271, PT_SC, ucp_Gothic }, + { 278, PT_SC, ucp_Greek }, + { 284, PT_SC, ucp_Gujarati }, + { 293, PT_SC, ucp_Gurmukhi }, + { 302, PT_SC, ucp_Han }, + { 306, PT_SC, ucp_Hangul }, + { 313, PT_SC, ucp_Hanunoo }, + { 321, PT_SC, ucp_Hebrew }, + { 328, PT_SC, ucp_Hiragana }, + { 337, PT_SC, ucp_Imperial_Aramaic }, + { 354, PT_SC, ucp_Inherited }, + { 364, PT_SC, ucp_Inscriptional_Pahlavi }, + { 386, PT_SC, ucp_Inscriptional_Parthian }, + { 409, PT_SC, ucp_Javanese }, + { 418, PT_SC, ucp_Kaithi }, + { 425, PT_SC, ucp_Kannada }, + { 433, PT_SC, ucp_Katakana }, + { 442, PT_SC, ucp_Kayah_Li }, + { 451, PT_SC, ucp_Kharoshthi }, + { 462, PT_SC, ucp_Khmer }, + { 468, PT_GC, ucp_L }, + { 470, PT_LAMP, 0 }, + { 473, PT_SC, ucp_Lao }, + { 477, PT_SC, ucp_Latin }, + { 483, PT_SC, ucp_Lepcha }, + { 490, PT_SC, ucp_Limbu }, + { 496, PT_SC, ucp_Linear_B }, + { 505, PT_SC, ucp_Lisu }, + { 510, PT_PC, ucp_Ll }, + { 513, PT_PC, ucp_Lm }, + { 516, PT_PC, ucp_Lo }, + { 519, PT_PC, ucp_Lt }, + { 522, PT_PC, ucp_Lu }, + { 525, PT_SC, ucp_Lycian }, + { 532, PT_SC, ucp_Lydian }, + { 539, PT_GC, ucp_M }, + { 541, PT_SC, ucp_Malayalam }, + { 551, PT_SC, ucp_Mandaic }, + { 559, PT_PC, ucp_Mc }, + { 562, PT_PC, ucp_Me }, + { 565, PT_SC, ucp_Meetei_Mayek }, + { 578, PT_SC, ucp_Meroitic_Cursive }, + { 595, PT_SC, ucp_Meroitic_Hieroglyphs }, + { 616, PT_SC, ucp_Miao }, + { 621, PT_PC, ucp_Mn }, + { 624, PT_SC, ucp_Mongolian }, + { 634, PT_SC, ucp_Myanmar }, + { 642, PT_GC, ucp_N }, + { 644, PT_PC, ucp_Nd }, + { 647, PT_SC, ucp_New_Tai_Lue }, + { 659, PT_SC, ucp_Nko }, + { 663, PT_PC, ucp_Nl }, + { 666, PT_PC, ucp_No }, + { 669, PT_SC, ucp_Ogham }, + { 675, PT_SC, ucp_Ol_Chiki }, + { 684, PT_SC, ucp_Old_Italic }, + { 695, PT_SC, ucp_Old_Persian }, + { 707, PT_SC, ucp_Old_South_Arabian }, + { 725, PT_SC, ucp_Old_Turkic }, + { 736, PT_SC, ucp_Oriya }, + { 742, PT_SC, ucp_Osmanya }, + { 750, PT_GC, ucp_P }, + { 752, PT_PC, ucp_Pc }, + { 755, PT_PC, ucp_Pd }, + { 758, PT_PC, ucp_Pe }, + { 761, PT_PC, ucp_Pf }, + { 764, PT_SC, ucp_Phags_Pa }, + { 773, PT_SC, ucp_Phoenician }, + { 784, PT_PC, ucp_Pi }, + { 787, PT_PC, ucp_Po }, + { 790, PT_PC, ucp_Ps }, + { 793, PT_SC, ucp_Rejang }, + { 800, PT_SC, ucp_Runic }, + { 806, PT_GC, ucp_S }, + { 808, PT_SC, ucp_Samaritan }, + { 818, PT_SC, ucp_Saurashtra }, + { 829, PT_PC, ucp_Sc }, + { 832, PT_SC, ucp_Sharada }, + { 840, PT_SC, ucp_Shavian }, + { 848, PT_SC, ucp_Sinhala }, + { 856, PT_PC, ucp_Sk }, + { 859, PT_PC, ucp_Sm }, + { 862, PT_PC, ucp_So }, + { 865, PT_SC, ucp_Sora_Sompeng }, + { 878, PT_SC, ucp_Sundanese }, + { 888, PT_SC, ucp_Syloti_Nagri }, + { 901, PT_SC, ucp_Syriac }, + { 908, PT_SC, ucp_Tagalog }, + { 916, PT_SC, ucp_Tagbanwa }, + { 925, PT_SC, ucp_Tai_Le }, + { 932, PT_SC, ucp_Tai_Tham }, + { 941, PT_SC, ucp_Tai_Viet }, + { 950, PT_SC, ucp_Takri }, + { 956, PT_SC, ucp_Tamil }, + { 962, PT_SC, ucp_Telugu }, + { 969, PT_SC, ucp_Thaana }, + { 976, PT_SC, ucp_Thai }, + { 981, PT_SC, ucp_Tibetan }, + { 989, PT_SC, ucp_Tifinagh }, + { 998, PT_SC, ucp_Ugaritic }, + { 1007, PT_SC, ucp_Vai }, + { 1011, PT_ALNUM, 0 }, + { 1015, PT_PXSPACE, 0 }, + { 1019, PT_SPACE, 0 }, + { 1023, PT_WORD, 0 }, + { 1027, PT_SC, ucp_Yi }, + { 1030, PT_GC, ucp_Z }, + { 1032, PT_PC, ucp_Zl }, + { 1035, PT_PC, ucp_Zp }, + { 1038, PT_PC, ucp_Zs } }; const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); @@ -587,7 +587,7 @@ const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); unsigned int _pcre_ucp_othercase(const unsigned int c) { - unsigned int oc; + unsigned int oc = NOTACHAR; if ((oc = g_unichar_toupper(c)) != c) return oc; diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h index 53a48c915..f1c14be5d 100644 --- a/glib/pcre/ucp.h +++ b/glib/pcre/ucp.h @@ -10,7 +10,11 @@ the UCD access macros. New values that are added for new releases of Unicode should always be at the end of each enum, for backwards compatibility. */ /* These are the general character categories. */ +#ifdef GLIB_COMPILATION #include "gunicode.h" +#else +#include +#endif enum { ucp_C, /* Other */ @@ -60,6 +64,9 @@ enum { /* These are the script identifications. */ enum { + ucp_Common = G_UNICODE_SCRIPT_COMMON, + ucp_Inherited = G_UNICODE_SCRIPT_INHERITED, + ucp_Arabic = G_UNICODE_SCRIPT_ARABIC, ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN, ucp_Bengali = G_UNICODE_SCRIPT_BENGALI, @@ -69,7 +76,6 @@ enum { ucp_Buhid = G_UNICODE_SCRIPT_BUHID, ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE, - ucp_Common = G_UNICODE_SCRIPT_COMMON, ucp_Coptic = G_UNICODE_SCRIPT_COPTIC, ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT, ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC, @@ -87,7 +93,6 @@ enum { ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO, ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW, ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA, - ucp_Inherited = G_UNICODE_SCRIPT_INHERITED, ucp_Kannada = G_UNICODE_SCRIPT_KANNADA, ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA, ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI, diff --git a/glib/update-pcre/ucp.patch b/glib/update-pcre/ucp.patch index 1b82f27b6..402020fa8 100644 --- a/glib/update-pcre/ucp.patch +++ b/glib/update-pcre/ucp.patch @@ -1,11 +1,10 @@ -From 384879be07418fc6224b6603a2e8ca6f11e178fc Mon Sep 17 00:00:00 2001 +From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001 From: Christian Persch Date: Sun, 12 Feb 2012 21:20:33 +0100 Subject: [PATCH] regex: Use glib for unicode data Use g_unichar_type() and g_unichar_get_script() instead of pcre tables. --- - glib/pcre/Makefile.am | 1 - glib/pcre/pcre_compile.c | 26 +++--- glib/pcre/pcre_dfa_exec.c | 96 ++++++++-------- glib/pcre/pcre_exec.c | 26 +++--- @@ -13,25 +12,13 @@ Use g_unichar_type() and g_unichar_get_script() instead of pcre tables. glib/pcre/pcre_tables.c | 16 +++ glib/pcre/pcre_xclass.c | 24 ++-- glib/pcre/ucp.h | 265 +++++++++++++++++++++++---------------------- - 8 files changed, 239 insertions(+), 226 deletions(-) + 7 files changed, 239 insertions(+), 225 deletions(-) -diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am -index 21da5c5..1981953 100644 ---- a/glib/pcre/Makefile.am -+++ b/glib/pcre/Makefile.am -@@ -51,7 +51,6 @@ libpcre_la_SOURCES = \ - pcre_string_utils.c \ - pcre_study.c \ - pcre_tables.c \ -- pcre_ucd.c \ - pcre_valid_utf8.c \ - pcre_version.c \ - pcre_xclass.c \ diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c -index eb985df..b44055a 100644 +index 21bef80..a6c84e1 100644 --- a/glib/pcre/pcre_compile.c +++ b/glib/pcre/pcre_compile.c -@@ -2890,43 +2890,43 @@ Returns: TRUE if auto-possessifying is OK +@@ -2920,43 +2920,43 @@ Returns: TRUE if auto-possessifying is OK static BOOL check_char_prop(int c, int ptype, int pdata, BOOL negated) { @@ -89,10 +76,10 @@ index eb985df..b44055a 100644 } return FALSE; diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c -index 21d7be6..41ff65b 100644 +index 9565d46..3f913ce 100644 --- a/glib/pcre/pcre_dfa_exec.c +++ b/glib/pcre/pcre_dfa_exec.c -@@ -1015,7 +1015,7 @@ for (;;) +@@ -1060,7 +1060,7 @@ for (;;) if (clen > 0) { BOOL OK; @@ -101,7 +88,7 @@ index 21d7be6..41ff65b 100644 switch(code[1]) { case PT_ANY: -@@ -1023,43 +1023,43 @@ for (;;) +@@ -1068,43 +1068,43 @@ for (;;) break; case PT_LAMP: @@ -156,7 +143,7 @@ index 21d7be6..41ff65b 100644 c == CHAR_UNDERSCORE; break; -@@ -1209,7 +1209,7 @@ for (;;) +@@ -1294,7 +1294,7 @@ for (;;) if (clen > 0) { BOOL OK; @@ -165,7 +152,7 @@ index 21d7be6..41ff65b 100644 switch(code[2]) { case PT_ANY: -@@ -1217,43 +1217,43 @@ for (;;) +@@ -1302,43 +1302,43 @@ for (;;) break; case PT_LAMP: @@ -220,7 +207,7 @@ index 21d7be6..41ff65b 100644 c == CHAR_UNDERSCORE; break; -@@ -1456,7 +1456,7 @@ for (;;) +@@ -1541,7 +1541,7 @@ for (;;) if (clen > 0) { BOOL OK; @@ -229,7 +216,7 @@ index 21d7be6..41ff65b 100644 switch(code[2]) { case PT_ANY: -@@ -1464,43 +1464,43 @@ for (;;) +@@ -1549,43 +1549,43 @@ for (;;) break; case PT_LAMP: @@ -284,7 +271,7 @@ index 21d7be6..41ff65b 100644 c == CHAR_UNDERSCORE; break; -@@ -1728,7 +1728,7 @@ for (;;) +@@ -1813,7 +1813,7 @@ for (;;) if (clen > 0) { BOOL OK; @@ -293,7 +280,7 @@ index 21d7be6..41ff65b 100644 switch(code[1 + IMM2_SIZE + 1]) { case PT_ANY: -@@ -1736,43 +1736,43 @@ for (;;) +@@ -1821,43 +1821,43 @@ for (;;) break; case PT_LAMP: @@ -349,10 +336,10 @@ index 21d7be6..41ff65b 100644 break; diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c -index b715353..8eb3162 100644 +index 830b8b5..c89a3f9 100644 --- a/glib/pcre/pcre_exec.c +++ b/glib/pcre/pcre_exec.c -@@ -2507,7 +2507,7 @@ for (;;) +@@ -2565,7 +2565,7 @@ for (;;) } GETCHARINCTEST(c, eptr); { @@ -361,7 +348,7 @@ index b715353..8eb3162 100644 switch(ecode[1]) { -@@ -2516,44 +2516,44 @@ for (;;) +@@ -2574,44 +2574,44 @@ for (;;) break; case PT_LAMP: @@ -416,7 +403,7 @@ index b715353..8eb3162 100644 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) == (op == OP_NOTPROP)) -@@ -2561,8 +2561,8 @@ for (;;) +@@ -2619,8 +2619,8 @@ for (;;) break; case PT_WORD: @@ -428,10 +415,10 @@ index b715353..8eb3162 100644 RRETURN(MATCH_NOMATCH); break; diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h -index e5a4b6a..41c7ee3 100644 +index 181c312..234af1b 100644 --- a/glib/pcre/pcre_internal.h +++ b/glib/pcre/pcre_internal.h -@@ -2315,15 +2315,12 @@ extern const int PRIV(ucp_typerange)[]; +@@ -2329,15 +2329,12 @@ extern const int PRIV(ucp_typerange)[]; #ifdef SUPPORT_UCP /* UCD access macros */ @@ -452,10 +439,10 @@ index e5a4b6a..41c7ee3 100644 #endif /* SUPPORT_UCP */ diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c -index c8134ec..47becc7 100644 +index 7ac2d89..e401974 100644 --- a/glib/pcre/pcre_tables.c +++ b/glib/pcre/pcre_tables.c -@@ -563,6 +563,22 @@ const ucp_type_table PRIV(utt)[] = { +@@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = { const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);