regex: Import PCRE 8.31

https://bugzilla.gnome.org/show_bug.cgi?id=679193
2025-01-23 20:46:14 +01:00 · 2012-06-14 22:15:27 +02:00 · 2012-06-14 22:15:27 +02:00 · 9457833010
commit 9457833010
parent f66052fc87
12 changed files with 2090 additions and 1127 deletions
--- a/glib/pcre/pcre.h
+++ b/glib/pcre/pcre.h
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */

 #define PCRE_MAJOR          8
-#define PCRE_MINOR          30
+#define PCRE_MINOR          31
 #define PCRE_PRERELEASE     
-#define PCRE_DATE           2012-02-04
+#define PCRE_DATE           2012-07-06

 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
@ -179,6 +179,7 @@ compiling). */
 #define PCRE_ERROR_JIT_STACKLIMIT  (-27)
 #define PCRE_ERROR_BADMODE         (-28)
 #define PCRE_ERROR_BADENDIANNESS   (-29)
+#define PCRE_ERROR_DFA_BADRESTART  (-30)

 /* Specific error codes for UTF-8 validity checks */

@ -234,6 +235,7 @@ compiling). */
 #define PCRE_INFO_MINLENGTH         15
 #define PCRE_INFO_JIT               16
 #define PCRE_INFO_JITSIZE           17
+#define PCRE_INFO_MAXLOOKBEHIND     18

 /* Request types for pcre_config(). Do not re-arrange, in order to remain
 compatible. */
@ -255,6 +257,8 @@ compatible. */
 compatible. */

 #define PCRE_STUDY_JIT_COMPILE                0x0001
+#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE   0x0002
+#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE   0x0004

 /* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine
 these bits, just add new ones on the end, in order to remain compatible. */
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
@ -52,7 +52,11 @@ supporting internal functions that are not used by other modules. */

 #include "pcre_internal.h"

+#ifdef GLIB_COMPILATION
 #include "gstrfuncs.h"
+#else
+#include <glib.h>
+#endif

 /* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
 is also used by pcretest. PCRE_DEBUG is not defined when building a production
@ -490,6 +494,9 @@ static const char error_texts[] =
  "too many forward references\0"
  "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
  "invalid UTF-16 string\0"
+  /* 75 */
+  "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
+  "character value in \\u.... sequence is too large\0"
  ;

 /* Table to identify digits and hex digits. This is used when compiling
@ -831,6 +838,18 @@ else
          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
 #endif
          }
+
+#ifdef COMPILE_PCRE8
+        if (c > (utf ? 0x10ffff : 0xff))
+#else
+#ifdef COMPILE_PCRE16
+        if (c > (utf ? 0x10ffff : 0xffff))
+#endif
+#endif
+          {
+          *errorcodeptr = ERR76;
+          }
+        else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
        }
      }
    else
@ -2227,32 +2246,60 @@ for (;;)
      {
      case OP_CHAR:
      case OP_CHARI:
+      case OP_NOT:
+      case OP_NOTI:
      case OP_EXACT:
      case OP_EXACTI:
+      case OP_NOTEXACT:
+      case OP_NOTEXACTI:
      case OP_UPTO:
      case OP_UPTOI:
+      case OP_NOTUPTO:
+      case OP_NOTUPTOI:
      case OP_MINUPTO:
      case OP_MINUPTOI:
+      case OP_NOTMINUPTO:
+      case OP_NOTMINUPTOI:
      case OP_POSUPTO:
      case OP_POSUPTOI:
+      case OP_NOTPOSUPTO:
+      case OP_NOTPOSUPTOI:
      case OP_STAR:
      case OP_STARI:
+      case OP_NOTSTAR:
+      case OP_NOTSTARI:
      case OP_MINSTAR:
      case OP_MINSTARI:
+      case OP_NOTMINSTAR:
+      case OP_NOTMINSTARI:
      case OP_POSSTAR:
      case OP_POSSTARI:
+      case OP_NOTPOSSTAR:
+      case OP_NOTPOSSTARI:
      case OP_PLUS:
      case OP_PLUSI:
+      case OP_NOTPLUS:
+      case OP_NOTPLUSI:
      case OP_MINPLUS:
      case OP_MINPLUSI:
+      case OP_NOTMINPLUS:
+      case OP_NOTMINPLUSI:
      case OP_POSPLUS:
      case OP_POSPLUSI:
+      case OP_NOTPOSPLUS:
+      case OP_NOTPOSPLUSI:
      case OP_QUERY:
      case OP_QUERYI:
+      case OP_NOTQUERY:
+      case OP_NOTQUERYI:
      case OP_MINQUERY:
      case OP_MINQUERYI:
+      case OP_NOTMINQUERY:
+      case OP_NOTMINQUERYI:
      case OP_POSQUERY:
      case OP_POSQUERYI:
+      case OP_NOTPOSQUERY:
+      case OP_NOTPOSQUERYI:
      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
      break;
      }
@ -3071,22 +3118,28 @@ if (next >= 0) switch(op_code)
 #endif  /* SUPPORT_UTF */
  return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */

-  /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
-  opcodes are not used for multi-byte characters, because they are coded using
-  an XCLASS instead. */
-
  case OP_NOT:
-  return (c = *previous) == next;
+#ifdef SUPPORT_UTF
+  GETCHARTEST(c, previous);
+#else
+  c = *previous;
+#endif
+  return c == next;

  case OP_NOTI:
-  if ((c = *previous) == next) return TRUE;
+#ifdef SUPPORT_UTF
+  GETCHARTEST(c, previous);
+#else
+  c = *previous;
+#endif
+  if (c == next) return TRUE;
 #ifdef SUPPORT_UTF
  if (utf)
    {
    unsigned int othercase;
    if (next < 128) othercase = cd->fcc[next]; else
 #ifdef SUPPORT_UCP
-    othercase = UCD_OTHERCASE(next);
+    othercase = UCD_OTHERCASE((unsigned int)next);
 #else
    othercase = NOTACHAR;
 #endif
@ -3094,28 +3147,28 @@ if (next >= 0) switch(op_code)
    }
  else
 #endif  /* SUPPORT_UTF */
-  return (c == (int)(TABLE_GET((unsigned int)next, cd->fcc, next)));  /* Non-UTF-8 mode */
+  return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */

  /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
  When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */

  case OP_DIGIT:
-  return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;
+  return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;

  case OP_NOT_DIGIT:
-  return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;
+  return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;

  case OP_WHITESPACE:
-  return next > 127 || (cd->ctypes[next] & ctype_space) == 0;
+  return next > 255 || (cd->ctypes[next] & ctype_space) == 0;

  case OP_NOT_WHITESPACE:
-  return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;
+  return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;

  case OP_WORDCHAR:
-  return next > 127 || (cd->ctypes[next] & ctype_word) == 0;
+  return next > 255 || (cd->ctypes[next] & ctype_word) == 0;

  case OP_NOT_WORDCHAR:
-  return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;
+  return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;

  case OP_HSPACE:
  case OP_NOT_HSPACE:
@ -3193,22 +3246,22 @@ switch(op_code)
  switch(-next)
    {
    case ESC_d:
-    return c > 127 || (cd->ctypes[c] & ctype_digit) == 0;
+    return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;

    case ESC_D:
-    return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0;
+    return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0;

    case ESC_s:
-    return c > 127 || (cd->ctypes[c] & ctype_space) == 0;
+    return c > 255 || (cd->ctypes[c] & ctype_space) == 0;

    case ESC_S:
-    return c <= 127 && (cd->ctypes[c] & ctype_space) != 0;
+    return c <= 255 && (cd->ctypes[c] & ctype_space) != 0;

    case ESC_w:
-    return c > 127 || (cd->ctypes[c] & ctype_word) == 0;
+    return c > 255 || (cd->ctypes[c] & ctype_word) == 0;

    case ESC_W:
-    return c <= 127 && (cd->ctypes[c] & ctype_word) != 0;
+    return c <= 255 && (cd->ctypes[c] & ctype_word) != 0;

    case ESC_h:
    case ESC_H:
@ -3317,10 +3370,10 @@ switch(op_code)
  return next == -ESC_d;

  case OP_WHITESPACE:
-  return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;
+  return next == -ESC_S || next == -ESC_d || next == -ESC_w;

  case OP_NOT_WHITESPACE:
-  return next == -ESC_s || next == -ESC_h || next == -ESC_v;
+  return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R;

  case OP_HSPACE:
  return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
@ -4484,41 +4537,34 @@ for (;; ptr++)
      LONE_SINGLE_CHARACTER:

      /* Only the value of 1 matters for class_single_char. */
+
      if (class_single_char < 2) class_single_char++;

      /* If class_charcount is 1, we saw precisely one character. As long as
-      there were no negated characters >= 128 and there was no use of \p or \P,
-      in other words, no use of any XCLASS features, we can optimize.
-
-      In UTF-8 mode, we can optimize the negative case only if there were no
-      characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
-      operate on single-bytes characters only. This is an historical hangover.
-      Maybe one day we can tidy these opcodes to handle multi-byte characters.
+      there was no use of \p or \P, in other words, no use of any XCLASS
+      features, we can optimize.

      The optimization throws away the bit map. We turn the item into a
      1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
-      Note that OP_NOT[I] does not support multibyte characters. In the positive
-      case, it can cause firstchar to be set. Otherwise, there can be no first
-      char if this item is first, whatever repeat count may follow. In the case
-      of reqchar, save the previous value for reinstating. */
+      In the positive case, it can cause firstchar to be set. Otherwise, there
+      can be no first char if this item is first, whatever repeat count may
+      follow. In the case of reqchar, save the previous value for reinstating. */

-#ifdef SUPPORT_UTF
-      if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET
-        && (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))
-#else
      if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
-#endif
        {
        ptr++;
        zeroreqchar = reqchar;

-        /* The OP_NOT[I] opcodes work on single characters only. */
-
        if (negate_class)
          {
          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
          zerofirstchar = firstchar;
          *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
+#ifdef SUPPORT_UTF
+          if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
+            code += PRIV(ord2utf)(c, code);
+          else
+#endif
            *code++ = c;
          goto NOT_CHAR;
          }
@ -4777,15 +4823,23 @@ for (;; ptr++)

    /* Now handle repetition for the different types of item. */

-    /* If previous was a character match, abolish the item and generate a
-    repeat item instead. If a char item has a minumum of more than one, ensure
-    that it is set in reqchar - it might not be if a sequence such as x{3} is
-    the first thing in a branch because the x will have gone into firstchar
-    instead.  */
+    /* If previous was a character or negated character match, abolish the item
+    and generate a repeat item instead. If a char item has a minimum of more
+    than one, ensure that it is set in reqchar - it might not be if a sequence
+    such as x{3} is the first thing in a branch because the x will have gone
+    into firstchar instead.  */

-    if (*previous == OP_CHAR || *previous == OP_CHARI)
+    if (*previous == OP_CHAR || *previous == OP_CHARI
+        || *previous == OP_NOT || *previous == OP_NOTI)
      {
-      op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;
+      switch (*previous)
+        {
+        default: /* Make compiler happy. */
+        case OP_CHAR:  op_type = OP_STAR - OP_STAR; break;
+        case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
+        case OP_NOT:   op_type = OP_NOTSTAR - OP_STAR; break;
+        case OP_NOTI:  op_type = OP_NOTSTARI - OP_STAR; break;
+        }

      /* Deal with UTF characters that take up more than one character. It's
      easier to write this out separately than try to macrify it. Use c to
@ -4808,7 +4862,8 @@ for (;; ptr++)
      with UTF disabled, or for a single character UTF character. */
        {
        c = code[-1];
-        if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
+        if (*previous <= OP_CHARI && repeat_min > 1)
+          reqchar = c | req_caseopt | cd->req_varyopt;
        }

      /* If the repetition is unlimited, it pays to see if the next thing on
@ -4827,26 +4882,6 @@ for (;; ptr++)
      goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
      }

-    /* If previous was a single negated character ([^a] or similar), we use
-    one of the special opcodes, replacing it. The code is shared with single-
-    character repeats by setting opt_type to add a suitable offset into
-    repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI
-    are currently used only for single-byte chars. */
-
-    else if (*previous == OP_NOT || *previous == OP_NOTI)
-      {
-      op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR;
-      c = previous[1];
-      if (!possessive_quantifier &&
-          repeat_max < 0 &&
-          check_auto_possessive(previous, utf, ptr + 1, options, cd))
-        {
-        repeat_type = 0;    /* Force greedy */
-        possessive_quantifier = TRUE;
-        }
-      goto OUTPUT_SINGLE_REPEAT;
-      }
-
    /* If previous was a character type match (\d or similar), abolish it and
    create a suitable repeat item. The code is shared with single-character
    repeats by setting op_type to add a suitable offset into repeat_type. Note
@ -5587,6 +5622,11 @@ for (;; ptr++)
        arg = ++ptr;
        while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
        arglen = (int)(ptr - arg);
+        if (arglen > (int)MAX_MARK)
+          {
+          *errorcodeptr = ERR75;
+          goto FAILED;
+          }
        }

      if (*ptr != CHAR_RIGHT_PARENTHESIS)
@ -6838,10 +6878,13 @@ for (;; ptr++)
      /* For the rest (including \X when Unicode properties are supported), we
      can obtain the OP value by negating the escape value in the default
      situation when PCRE_UCP is not set. When it *is* set, we substitute
-      Unicode property tests. */
+      Unicode property tests. Note that \b and \B do a one-character
+      lookbehind. */

      else
        {
+        if ((-c == ESC_b || -c == ESC_B) && cd->max_lookbehind == 0)
+          cd->max_lookbehind = 1;
 #ifdef SUPPORT_UCP
        if (-c >= ESC_DU && -c <= ESC_wu)
          {
@ -7149,7 +7192,12 @@ for (;;)
        *ptrptr = ptr;
        return FALSE;
        }
-      else { PUT(reverse_count, 0, fixed_length); }
+      else
+        {
+        if (fixed_length > cd->max_lookbehind)
+          cd->max_lookbehind = fixed_length;
+        PUT(reverse_count, 0, fixed_length);
+        }
      }
    }

@ -7819,6 +7867,7 @@ cd->start_pattern = (const pcre_uchar *)pattern;
 cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
 cd->req_varyopt = 0;
 cd->assert_depth = 0;
+cd->max_lookbehind = 0;
 cd->external_options = options;
 cd->external_flags = 0;
 cd->open_caps = NULL;
@ -7869,7 +7918,6 @@ re->magic_number = MAGIC_NUMBER;
 re->size = (int)size;
 re->options = cd->external_options;
 re->flags = cd->external_flags;
-re->dummy1 = 0;
 re->first_char = 0;
 re->req_char = 0;
 re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
@ -7889,6 +7937,7 @@ field; this time it's used for remembering forward references to subpatterns.
 cd->final_bracount = cd->bracount;  /* Save for checking forward references */
 cd->assert_depth = 0;
 cd->bracount = 0;
+cd->max_lookbehind = 0;
 cd->names_found = 0;
 cd->name_table = (pcre_uchar *)re + re->name_table_offset;
 codestart = cd->name_table + re->name_entry_size * re->name_count;
@ -7910,6 +7959,7 @@ code = (pcre_uchar *)codestart;
  &firstchar, &reqchar, NULL, cd, NULL);
 re->top_bracket = cd->bracount;
 re->top_backref = cd->top_backref;
+re->max_lookbehind = cd->max_lookbehind;
 re->flags = cd->external_flags | PCRE_MODE;

 if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
@ -7997,6 +8047,7 @@ if (cd->check_lookbehind)
                    (fixed_length == -4)? ERR70 : ERR25;
        break;
        }
+      if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length;
      PUT(cc, 1, fixed_length);
      }
    cc += 1 + LINK_SIZE;
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@ -38,7 +38,6 @@ POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */

-
 /* This module contains the external function pcre_dfa_exec(), which is an
 alternative matching function that uses a sort of DFA algorithm (not a true
 FSM). This is NOT Perl-compatible, but it has advantages in certain
@ -282,7 +281,7 @@ typedef struct stateblock {
  int data;                       /* Some use extra data */
 } stateblock;

-#define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))
+#define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))


 #ifdef PCRE_DEBUG
@ -382,7 +381,8 @@ for the current character, one for the following character). */
    next_new_state->count  = (y); \
    next_new_state->data   = (z); \
    next_new_state++; \
-    DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
+    DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
+      (x), (y), (z), __LINE__)); \
    } \
  else return PCRE_ERROR_DFA_WSSIZE

@ -424,6 +424,8 @@ BOOL utf = (md->poptions & PCRE_UTF8) != 0;
 BOOL utf = FALSE;
 #endif

+BOOL reset_could_continue = FALSE;
+
 rlevel++;
 offsetcount &= (-2);

@ -571,7 +573,9 @@ for (;;)
  int clen, dlen;
  unsigned int c, d;
  int forced_fail = 0;
-  BOOL could_continue = FALSE;
+  BOOL partial_newline = FALSE;
+  BOOL could_continue = reset_could_continue;
+  reset_could_continue = FALSE;

  /* Make the new state list into the active state list and empty the
  new state list. */
@ -607,7 +611,7 @@ for (;;)

  if (ptr < end_subject)
    {
-    clen = 1;        /* Number of bytes in the character */
+    clen = 1;        /* Number of data items in the character */
 #ifdef SUPPORT_UTF
    if (utf) { GETCHARLEN(c, ptr, clen); } else
 #endif  /* SUPPORT_UTF */
@ -641,7 +645,8 @@ for (;;)

    /* A negative offset is a special case meaning "hold off going to this
    (negated) state until the number of characters in the data field have
-    been skipped". */
+    been skipped". If the could_continue flag was passed over from a previous
+    state, arrange for it to passed on. */

    if (state_offset < 0)
      {
@ -650,6 +655,7 @@ for (;;)
        DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
        ADD_NEW_DATA(state_offset, current_state->count,
          current_state->data - 1);
+        if (could_continue) reset_could_continue = TRUE;
        continue;
        }
      else
@ -689,10 +695,10 @@ for (;;)
    permitted.

    We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
-    argument that is not a data character - but is always one byte long. We
-    have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
-    this case. To keep the other cases fast, convert these ones to new opcodes.
-    */
+    argument that is not a data character - but is always one byte long because
+    the values are small. We have to take special action to deal with  \P, \p,
+    \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
+    these ones to new opcodes. */

    if (coptable[codevalue] > 0)
      {
@ -783,7 +789,7 @@ for (;;)
            offsets[0] = (int)(current_subject - start_subject);
            offsets[1] = (int)(ptr - start_subject);
            DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
-              offsets[1] - offsets[0], current_subject));
+              offsets[1] - offsets[0], (char *)current_subject));
            }
          if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
            {
@ -888,7 +894,20 @@ for (;;)
      /*-----------------------------------------------------------------*/
      case OP_ANY:
      if (clen > 0 && !IS_NEWLINE(ptr))
-        { ADD_NEW(state_offset + 1, 0); }
+        {
+        if (ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else
+          {
+          ADD_NEW(state_offset + 1, 0);
+          }
+        }
      break;

      /*-----------------------------------------------------------------*/
@ -916,6 +935,19 @@ for (;;)
               (ptr == end_subject - md->nllen)
            ))
          { ADD_ACTIVE(state_offset + 1, 0); }
+        else if (ptr + 1 >= md->end_subject &&
+                 (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
+                 NLBLOCK->nltype == NLTYPE_FIXED &&
+                 NLBLOCK->nllen == 2 &&
+                 c == NLBLOCK->nl[0])
+          {
+          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+            {
+            reset_could_continue = TRUE;
+            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
+            }
+          else could_continue = partial_newline = TRUE;
+          }
        }
      break;

@ -928,6 +960,19 @@ for (;;)
        else if (clen == 0 ||
            ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
          { ADD_ACTIVE(state_offset + 1, 0); }
+        else if (ptr + 1 >= md->end_subject &&
+                 (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
+                 NLBLOCK->nltype == NLTYPE_FIXED &&
+                 NLBLOCK->nllen == 2 &&
+                 c == NLBLOCK->nl[0])
+          {
+          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+            {
+            reset_could_continue = TRUE;
+            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
+            }
+          else could_continue = partial_newline = TRUE;
+          }
        }
      else if (IS_NEWLINE(ptr))
        { ADD_ACTIVE(state_offset + 1, 0); }
@ -1090,7 +1135,15 @@ for (;;)
      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
      if (clen > 0)
        {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
            (c < 256 &&
              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1113,7 +1166,15 @@ for (;;)
      ADD_ACTIVE(state_offset + 2, 0);
      if (clen > 0)
        {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
            (c < 256 &&
              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1135,7 +1196,15 @@ for (;;)
      ADD_ACTIVE(state_offset + 2, 0);
      if (clen > 0)
        {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
            (c < 256 &&
              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1155,7 +1224,15 @@ for (;;)
      count = current_state->count;  /* Number already matched */
      if (clen > 0)
        {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
            (c < 256 &&
              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1176,7 +1253,15 @@ for (;;)
      count = current_state->count;  /* Number already matched */
      if (clen > 0)
        {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
            (c < 256 &&
              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@ -1824,6 +1909,8 @@ for (;;)
          ncount++;
          nptr += ndlen;
          }
+        if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+            reset_could_continue = TRUE;
        if (++count >= GET2(code, 1))
          { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
        else
@ -2037,6 +2124,8 @@ for (;;)
          ncount++;
          nptr += nclen;
          }
+        if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+            reset_could_continue = TRUE;
        ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
        }
      break;
@ -2062,7 +2151,13 @@ for (;;)
        break;

        case 0x000d:
-        if (ptr + 1 < end_subject && ptr[1] == 0x0a)
+        if (ptr + 1 >= end_subject)
+          {
+          ADD_NEW(state_offset + 1, 0);
+          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+            reset_could_continue = TRUE;
+          }
+        else if (ptr[1] == 0x0a)
          {
          ADD_NEW_DATA(-(state_offset + 1), 0, 1);
          }
@ -2171,22 +2266,32 @@ for (;;)
      break;

      /*-----------------------------------------------------------------*/
-      /* Match a negated single character casefully. This is only used for
-      one-byte characters, that is, we know that d < 256. The character we are
-      checking (c) can be multibyte. */
+      /* Match a negated single character casefully. */

      case OP_NOT:
      if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
      break;

      /*-----------------------------------------------------------------*/
-      /* Match a negated single character caselessly. This is only used for
-      one-byte characters, that is, we know that d < 256. The character we are
-      checking (c) can be multibyte. */
+      /* Match a negated single character caselessly. */

      case OP_NOTI:
-      if (clen > 0 && c != d && c != fcc[d])
+      if (clen > 0)
+        {
+        unsigned int otherd;
+#ifdef SUPPORT_UTF
+        if (utf && d >= 128)
+          {
+#ifdef SUPPORT_UCP
+          otherd = UCD_OTHERCASE(d);
+#endif  /* SUPPORT_UCP */
+          }
+        else
+#endif  /* SUPPORT_UTF */
+        otherd = TABLE_GET(d, fcc, d);
+        if (c != d && c != otherd)
          { ADD_NEW(state_offset + dlen + 1, 0); }
+        }
      break;

      /*-----------------------------------------------------------------*/
@ -2692,9 +2797,12 @@ for (;;)
            {
            int charcount = local_offsets[rc+1] - local_offsets[rc];
 #ifdef SUPPORT_UTF
+            if (utf)
+              {
              const pcre_uchar *p = start_subject + local_offsets[rc];
              const pcre_uchar *pp = start_subject + local_offsets[rc+1];
              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+              }
 #endif
            if (charcount > 0)
              {
@ -2793,7 +2901,7 @@ for (;;)
            const pcre_uchar *pp = local_ptr;
            charcount = (int)(pp - p);
 #ifdef SUPPORT_UTF
-            while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+            if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
 #endif
            ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
            }
@ -2875,9 +2983,12 @@ for (;;)
          else
            {
 #ifdef SUPPORT_UTF
+            if (utf)
+              {
              const pcre_uchar *p = start_subject + local_offsets[0];
              const pcre_uchar *pp = start_subject + local_offsets[1];
              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+              }
 #endif
            ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
            if (repeat_state_offset >= 0)
@ -2946,7 +3057,7 @@ for (;;)
  if (new_count <= 0)
    {
    if (rlevel == 1 &&                               /* Top level, and */
-        could_continue &&                            /* Some could go on */
+        could_continue &&                            /* Some could go on, and */
        forced_fail != workspace[1] &&               /* Not all forced fail & */
        (                                            /* either... */
        (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
@ -2954,8 +3065,13 @@ for (;;)
        ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
         match_count < 0)                            /* no matches */
        ) &&                                         /* And... */
-        ptr >= end_subject &&                  /* Reached end of subject */
+        (
+        partial_newline ||                           /* Either partial NL */
+          (                                          /* or ... */
+          ptr >= end_subject &&                /* End of subject and */
          ptr > md->start_used_ptr)            /* Inspected non-empty string */
+          )
+        )
      {
      if (offsetcount >= 2)
        {
@ -3052,10 +3168,27 @@ if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;

-/* We need to find the pointer to any study data before we test for byte
-flipping, so we scan the extra_data block first. This may set two fields in the
-match block, so we must initialize them beforehand. However, the other fields
-in the match block must not be set until after the byte flipping. */
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
+REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
+means that the pattern is likely compiled with different endianness. */
+
+if (re->magic_number != MAGIC_NUMBER)
+  return re->magic_number == REVERSED_MAGIC_NUMBER?
+    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+
+/* If restarting after a partial match, do some sanity checks on the contents
+of the workspace. */
+
+if ((options & PCRE_DFA_RESTART) != 0)
+  {
+  if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
+    workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
+      return PCRE_ERROR_DFA_BADRESTART;
+  }
+
+/* Set up study, callout, and table data */

 md->tables = re->tables;
 md->callout_data = NULL;
@ -3074,16 +3207,6 @@ if (extra_data != NULL)
    md->tables = extra_data->tables;
  }

-/* Check that the first field in the block is the magic number. If it is not,
-return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
-REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
-means that the pattern is likely compiled with different endianness. */
-
-if (re->magic_number != MAGIC_NUMBER)
-  return re->magic_number == REVERSED_MAGIC_NUMBER?
-    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
-if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
-
 /* Set some local values */

 current_subject = (const pcre_uchar *)subject + start_offset;
--- a/glib/pcre/pcre_exec.c
+++ b/glib/pcre/pcre_exec.c
@ -37,7 +37,6 @@ POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */

-
 /* This module contains pcre_exec(), the externally visible function that does
 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
 possible. There are also some static supporting functions. */
@ -140,7 +139,9 @@ Arguments:
  md          points to match data block
  caseless    TRUE if caseless

-Returns:      < 0 if not matched, otherwise the number of subject bytes matched
+Returns:      >= 0 the number of subject bytes matched
+              -1 no match
+              -2 partial match; always given if at end subject
 */

 static int
@ -163,7 +164,8 @@ pchars(p, length, FALSE, md);
 printf("\n");
 #endif

-/* Always fail if reference not set (and not JavaScript compatible). */
+/* Always fail if reference not set (and not JavaScript compatible - in that
+case the length is passed as zero). */

 if (length < 0) return -1;

@ -189,7 +191,7 @@ if (caseless)
    while (p < endptr)
      {
      int c, d;
-      if (eptr >= md->end_subject) return -1;
+      if (eptr >= md->end_subject) return -2;   /* Partial match */
      GETCHARINC(c, eptr);
      GETCHARINC(d, p);
      if (c != d && c != UCD_OTHERCASE(d)) return -1;
@ -202,9 +204,9 @@ if (caseless)
  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
  is no UCP support. */
    {
-    if (eptr + length > md->end_subject) return -1;
    while (length-- > 0)
      {
+      if (eptr >= md->end_subject) return -2;   /* Partial match */
      if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
      p++;
      eptr++;
@ -217,8 +219,11 @@ are in UTF-8 mode. */

 else
  {
-  if (eptr + length > md->end_subject) return -1;
-  while (length-- > 0) if (*p++ != *eptr++) return -1;
+  while (length-- > 0)
+    {
+    if (eptr >= md->end_subject) return -2;   /* Partial match */
+    if (*p++ != *eptr++) return -1;
+    }
  }

 return (int)(eptr - eptr_start);
@ -311,8 +316,14 @@ argument of match(), which never changes. */

 #define RMATCH(ra,rb,rc,rd,re,rw)\
  {\
-  heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
+  heapframe *newframe = frame->Xnextframe;\
+  if (newframe == NULL)\
+    {\
+    newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
+    newframe->Xnextframe = NULL;\
+    frame->Xnextframe = newframe;\
+    }\
  frame->Xwhere = rw;\
  newframe->Xeptr = ra;\
  newframe->Xecode = rb;\
@ -332,7 +343,6 @@ argument of match(), which never changes. */
  {\
  heapframe *oldframe = frame;\
  frame = oldframe->Xprevframe;\
-  if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
  if (frame != NULL)\
    {\
    rrc = ra;\
@ -346,6 +356,7 @@ argument of match(), which never changes. */

 typedef struct heapframe {
  struct heapframe *Xprevframe;
+  struct heapframe *Xnextframe;

  /* Function arguments that may change */

@ -492,9 +503,7 @@ the top-level on the stack rather than malloc-ing them all gives a performance
 boost in many cases where there is not much "recursion". */

 #ifdef NO_RECURSE
-heapframe frame_zero;
-heapframe *frame = &frame_zero;
-frame->Xprevframe = NULL;            /* Marks the top level */
+heapframe *frame = (heapframe *)md->match_frames_base;

 /* Copy in the original argument variables */

@ -897,7 +906,6 @@ for (;;)
      }
    else  /* OP_KETRMAX */
      {
-      md->match_function_type = MATCH_CBEGROUP;
      RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
      ecode += 1 + LINK_SIZE;
@ -1026,7 +1034,8 @@ for (;;)

    for (;;)
      {
-      if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
+      if (op >= OP_SBRA || op == OP_ONCE)
+        md->match_function_type = MATCH_CBEGROUP;

      /* If this is not a possibly empty group, and there are no (*THEN)s in
      the pattern, and this is the final alternative, optimize as described
@ -1565,13 +1574,18 @@ for (;;)
        mstart = md->start_match_ptr;   /* In case \K reset it */
        break;
        }
+      md->mark = save_mark;

-      /* PCRE does not allow THEN to escape beyond an assertion; it is treated
-      as NOMATCH. */
+      /* A COMMIT failure must fail the entire assertion, without trying any
+      subsequent branches. */
+
+      if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
+
+      /* PCRE does not allow THEN to escape beyond an assertion; it
+      is treated as NOMATCH. */

      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
      ecode += GET(ecode, 1);
-      md->mark = save_mark;
      }
    while (*ecode == OP_ALT);

@ -1779,10 +1793,11 @@ for (;;)
          goto RECURSION_MATCHED;        /* Exit loop; end processing */
          }

-        /* PCRE does not allow THEN to escape beyond a recursion; it is treated
-        as NOMATCH. */
+        /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
+        is treated as NOMATCH. */

-        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
+        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
+                 rrc != MATCH_COMMIT)
          {
          DPRINTF(("Recursion gave error %d\n", rrc));
          if (new_recursive.offset_save != stacksave)
@ -1993,7 +2008,6 @@ for (;;)
        }
      if (*prev >= OP_SBRA)    /* Could match an empty string */
        {
-        md->match_function_type = MATCH_CBEGROUP;
        RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
        RRETURN(rrc);
        }
@ -2002,7 +2016,6 @@ for (;;)
      }
    else  /* OP_KETRMAX */
      {
-      if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
      RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
      if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@ -2059,7 +2072,21 @@ for (;;)

    case OP_DOLLM:
    if (eptr < md->end_subject)
-      { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
+      {
+      if (!IS_NEWLINE(eptr))
+        {
+        if (md->partial != 0 &&
+            eptr + 1 >= md->end_subject &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            *eptr == NLBLOCK->nl[0])
+          {
+          md->hitend = TRUE;
+          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+          }
+        RRETURN(MATCH_NOMATCH);
+        }
+      }
    else
      {
      if (md->noteol) RRETURN(MATCH_NOMATCH);
@ -2091,7 +2118,18 @@ for (;;)
    ASSERT_NL_OR_EOS:
    if (eptr < md->end_subject &&
        (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
+      {
+      if (md->partial != 0 &&
+          eptr + 1 >= md->end_subject &&
+          NLBLOCK->nltype == NLTYPE_FIXED &&
+          NLBLOCK->nllen == 2 &&
+          *eptr == NLBLOCK->nl[0])
+        {
+        md->hitend = TRUE;
+        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+        }
      RRETURN(MATCH_NOMATCH);
+      }

    /* Either at end of string or \n before end. */

@ -2219,12 +2257,25 @@ for (;;)
      }
    break;

-    /* Match a single character type; inline for speed */
+    /* Match any single character type except newline; have to take care with
+    CRLF newlines and partial matching. */

    case OP_ANY:
    if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
+    if (md->partial != 0 &&
+        eptr + 1 >= md->end_subject &&
+        NLBLOCK->nltype == NLTYPE_FIXED &&
+        NLBLOCK->nllen == 2 &&
+        *eptr == NLBLOCK->nl[0])
+      {
+      md->hitend = TRUE;
+      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+      }
+
    /* Fall through */

+    /* Match any single character whatsoever. */
+
    case OP_ALLANY:
    if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
      {                            /* not be updated before SCHECK_PARTIAL. */
@ -2365,7 +2416,11 @@ for (;;)
      default: RRETURN(MATCH_NOMATCH);

      case 0x000d:
-      if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
+      if (eptr >= md->end_subject)
+        {
+        SCHECK_PARTIAL();
+        }
+      else if (*eptr == 0x0a) eptr++;
      break;

      case 0x000a:
@ -2595,6 +2650,7 @@ for (;;)
      if (UCD_CATEGORY(c) != ucp_M) break;
      eptr += len;
      }
+    CHECK_PARTIAL();
    ecode++;
    break;
 #endif
@ -2660,6 +2716,7 @@ for (;;)
      default:               /* No repeat follows */
      if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
        {
+        if (length == -2) eptr = md->end_subject;   /* Partial match */
        CHECK_PARTIAL();
        RRETURN(MATCH_NOMATCH);
        }
@ -2685,6 +2742,7 @@ for (;;)
      int slength;
      if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
        {
+        if (slength == -2) eptr = md->end_subject;   /* Partial match */
        CHECK_PARTIAL();
        RRETURN(MATCH_NOMATCH);
        }
@ -2708,6 +2766,7 @@ for (;;)
        if (fi >= max) RRETURN(MATCH_NOMATCH);
        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
          {
+          if (slength == -2) eptr = md->end_subject;   /* Partial match */
          CHECK_PARTIAL();
          RRETURN(MATCH_NOMATCH);
          }
@ -2726,11 +2785,20 @@ for (;;)
        int slength;
        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
          {
-          CHECK_PARTIAL();
+          /* Can't use CHECK_PARTIAL because we don't want to update eptr in
+          the soft partial matching case. */
+
+          if (slength == -2 && md->partial != 0 &&
+              md->end_subject > md->start_used_ptr)
+            {
+            md->hitend = TRUE;
+            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+            }
          break;
          }
        eptr += slength;
        }
+
      while (eptr >= pp)
        {
        RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
@ -3360,7 +3428,7 @@ for (;;)
    maximizing, find the maximum number of characters and work backwards. */

    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
-      max, eptr));
+      max, (char *)eptr));

    if (op >= OP_STARI)  /* Caseless */
      {
@ -3504,33 +3572,41 @@ for (;;)
      SCHECK_PARTIAL();
      RRETURN(MATCH_NOMATCH);
      }
-    ecode++;
-    GETCHARINCTEST(c, eptr);
-    if (op == OP_NOTI)         /* The caseless case */
+#ifdef SUPPORT_UTF
+    if (utf)
      {
      register unsigned int ch, och;
-      ch = *ecode++;
-#ifdef COMPILE_PCRE8
-      /* ch must be < 128 if UTF is enabled. */
-      och = md->fcc[ch];
-#else
-#ifdef SUPPORT_UTF
+
+      ecode++;
+      GETCHARINC(ch, ecode);
+      GETCHARINC(c, eptr);
+
+      if (op == OP_NOT)
+        {
+        if (ch == c) RRETURN(MATCH_NOMATCH);
+        }
+      else
+        {
 #ifdef SUPPORT_UCP
-      if (utf && ch > 127)
+        if (ch > 127)
          och = UCD_OTHERCASE(ch);
 #else
-      if (utf && ch > 127)
+        if (ch > 127)
          och = ch;
 #endif /* SUPPORT_UCP */
        else
-#endif /* SUPPORT_UTF */
          och = TABLE_GET(ch, md->fcc, ch);
-#endif /* COMPILE_PCRE8 */
        if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
        }
-    else    /* Caseful */
+      }
+    else
+#endif
      {
-      if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
+      register unsigned int ch = ecode[1];
+      c = *eptr++;
+      if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
+        RRETURN(MATCH_NOMATCH);
+      ecode += 2;
      }
    break;

@ -3610,7 +3686,7 @@ for (;;)
    /* Common code for all repeated single-byte matches. */

    REPEATNOTCHAR:
-    fc = *ecode++;
+    GETCHARINCTEST(fc, ecode);

    /* The code is duplicated for the caseless and caseful cases, for speed,
    since matching characters is likely to be quite common. First, ensure the
@ -3621,14 +3697,10 @@ for (;;)
    characters and work backwards. */

    DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
-      max, eptr));
+      max, (char *)eptr));

    if (op >= OP_NOTSTARI)     /* Caseless */
      {
-#ifdef COMPILE_PCRE8
-      /* fc must be < 128 if UTF is enabled. */
-      foc = md->fcc[fc];
-#else
 #ifdef SUPPORT_UTF
 #ifdef SUPPORT_UCP
      if (utf && fc > 127)
@ -3640,7 +3712,6 @@ for (;;)
      else
 #endif /* SUPPORT_UTF */
        foc = TABLE_GET(fc, md->fcc, fc);
-#endif /* COMPILE_PCRE8 */

 #ifdef SUPPORT_UTF
      if (utf)
@ -4164,6 +4235,7 @@ for (;;)
            if (UCD_CATEGORY(c) != ucp_M) break;
            eptr += len;
            }
+          CHECK_PARTIAL();
          }
        }

@ -4184,6 +4256,15 @@ for (;;)
            RRETURN(MATCH_NOMATCH);
            }
          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
+          if (md->partial != 0 &&
+              eptr + 1 >= md->end_subject &&
+              NLBLOCK->nltype == NLTYPE_FIXED &&
+              NLBLOCK->nllen == 2 &&
+              *eptr == NLBLOCK->nl[0])
+            {
+            md->hitend = TRUE;
+            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+            }
          eptr++;
          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
          }
@ -4468,6 +4549,15 @@ for (;;)
            RRETURN(MATCH_NOMATCH);
            }
          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
+          if (md->partial != 0 &&
+              eptr + 1 >= md->end_subject &&
+              NLBLOCK->nltype == NLTYPE_FIXED &&
+              NLBLOCK->nllen == 2 &&
+              *eptr == NLBLOCK->nl[0])
+            {
+            md->hitend = TRUE;
+            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+            }
          eptr++;
          }
        break;
@ -4948,6 +5038,7 @@ for (;;)
            if (UCD_CATEGORY(c) != ucp_M) break;
            eptr += len;
            }
+          CHECK_PARTIAL();
          }
        }
      else
@ -4972,6 +5063,17 @@ for (;;)
          switch(ctype)
            {
            case OP_ANY:               /* This is the non-NL case */
+            if (md->partial != 0 &&    /* Take care with CRLF partial */
+                eptr >= md->end_subject &&
+                NLBLOCK->nltype == NLTYPE_FIXED &&
+                NLBLOCK->nllen == 2 &&
+                c == NLBLOCK->nl[0])
+              {
+              md->hitend = TRUE;
+              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+              }
+            break;
+
            case OP_ALLANY:
            case OP_ANYBYTE:
            break;
@ -5135,6 +5237,17 @@ for (;;)
          switch(ctype)
            {
            case OP_ANY:               /* This is the non-NL case */
+            if (md->partial != 0 &&    /* Take care with CRLF partial */
+                eptr >= md->end_subject &&
+                NLBLOCK->nltype == NLTYPE_FIXED &&
+                NLBLOCK->nllen == 2 &&
+                c == NLBLOCK->nl[0])
+              {
+              md->hitend = TRUE;
+              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+              }
+            break;
+
            case OP_ALLANY:
            case OP_ANYBYTE:
            break;
@ -5491,6 +5604,7 @@ for (;;)
            if (UCD_CATEGORY(c) != ucp_M) break;
            eptr += len;
            }
+          CHECK_PARTIAL();
          }

        /* eptr is now past the end of the maximum run */
@ -5534,6 +5648,15 @@ for (;;)
                break;
                }
              if (IS_NEWLINE(eptr)) break;
+              if (md->partial != 0 &&    /* Take care with CRLF partial */
+                  eptr + 1 >= md->end_subject &&
+                  NLBLOCK->nltype == NLTYPE_FIXED &&
+                  NLBLOCK->nllen == 2 &&
+                  *eptr == NLBLOCK->nl[0])
+                {
+                md->hitend = TRUE;
+                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+                }
              eptr++;
              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
              }
@ -5551,6 +5674,15 @@ for (;;)
                break;
                }
              if (IS_NEWLINE(eptr)) break;
+              if (md->partial != 0 &&    /* Take care with CRLF partial */
+                  eptr + 1 >= md->end_subject &&
+                  NLBLOCK->nltype == NLTYPE_FIXED &&
+                  NLBLOCK->nllen == 2 &&
+                  *eptr == NLBLOCK->nl[0])
+                {
+                md->hitend = TRUE;
+                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+                }
              eptr++;
              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
              }
@ -5815,6 +5947,15 @@ for (;;)
              break;
              }
            if (IS_NEWLINE(eptr)) break;
+            if (md->partial != 0 &&    /* Take care with CRLF partial */
+                eptr + 1 >= md->end_subject &&
+                NLBLOCK->nltype == NLTYPE_FIXED &&
+                NLBLOCK->nllen == 2 &&
+                *eptr == NLBLOCK->nl[0])
+              {
+              md->hitend = TRUE;
+              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
+              }
            eptr++;
            }
          break;
@ -6145,6 +6286,31 @@ Undefine all the macros that were defined above to handle this. */
 ***************************************************************************/


+#ifdef NO_RECURSE
+/*************************************************
+*          Release allocated heap frames         *
+*************************************************/
+
+/* This function releases all the allocated frames. The base frame is on the
+machine stack, and so must not be freed.
+
+Argument: the address of the base frame
+Returns:  nothing
+*/
+
+static void
+release_match_heapframes (heapframe *frame_base)
+{
+heapframe *nextframe = frame_base->Xnextframe;
+while (nextframe != NULL)
+  {
+  heapframe *oldframe = nextframe;
+  nextframe = nextframe->Xnextframe;
+  (PUBL(stack_free))(oldframe);
+  }
+}
+#endif
+

 /*************************************************
 *         Execute a Regular Expression           *
@ -6207,13 +6373,22 @@ PCRE_PUCHAR req_char_ptr = start_match - 1;
 const pcre_study_data *study;
 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;

+#ifdef NO_RECURSE
+heapframe frame_zero;
+frame_zero.Xprevframe = NULL;            /* Marks the top level */
+frame_zero.Xnextframe = NULL;            /* None are allocated yet */
+md->match_frames_base = &frame_zero;
+#endif
+
 /* Check for the special magic call that measures the size of the stack used
-per recursive call of match(). */
+per recursive call of match(). Without the funny casting for sizeof, a Windows
+compiler gave this error: "unary minus operator applied to unsigned type,
+result still unsigned". Hopefully the cast fixes that. */

 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
    start_offset == -999)
 #ifdef NO_RECURSE
-  return -sizeof(heapframe);
+  return -((int)sizeof(heapframe));
 #else
  return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
 #endif
@ -6280,20 +6455,25 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
 /* If the pattern was successfully studied with JIT support, run the JIT
 executable instead of the rest of this function. Most options must be set at
 compile time for the JIT code to be usable. Fallback to the normal code path if
-an unsupported flag is set. In particular, JIT does not support partial
-matching. */
+an unsupported flag is set. */

 #ifdef SUPPORT_JIT
 if (extra_data != NULL
-    && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
+    && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
+                             PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
    && extra_data->executable_jit != NULL
-    && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
    && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
-                    PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
-  return PRIV(jit_exec)(re, extra_data->executable_jit,
-    (const pcre_uchar *)subject, length, start_offset, options,
-    ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
-    ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
+                    PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
+                    PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
+  {
+  rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
+       start_offset, options, offsets, offsetcount);
+
+  /* PCRE_ERROR_NULL means that the selected normal or partial matching
+  mode is not compiled. In this case we simply fallback to interpreter. */
+
+  if (rc != PCRE_ERROR_NULL) return rc;
+  }
 #endif

 /* Carry on with non-JIT matching. This information is for finding all the
@ -6887,7 +7067,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
    {
    register int *iptr, *iend;
    int resetcount = 2 + re->top_bracket * 2;
-    if (resetcount > offsetcount) resetcount = ocount;
+    if (resetcount > offsetcount) resetcount = offsetcount;
    iptr = offsets + md->end_offset_top;
    iend = offsets + resetcount;
    while (iptr < iend) *iptr++ = -1;
@ -6908,6 +7088,9 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
    *(extra_data->mark) = (pcre_uchar *)md->mark;
  DPRINTF((">>>> returning %d\n", rc));
+#ifdef NO_RECURSE
+  release_match_heapframes(&frame_zero);
+#endif
  return rc;
  }

@ -6925,6 +7108,9 @@ if (using_temporary_offsets)
 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
  {
  DPRINTF((">>>> error: returning %d\n", rc));
+#ifdef NO_RECURSE
+  release_match_heapframes(&frame_zero);
+#endif
  return rc;
  }

@ -6954,6 +7140,9 @@ else

 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
  *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
+#ifdef NO_RECURSE
+  release_match_heapframes(&frame_zero);
+#endif
 return rc;
 }

--- a/glib/pcre/pcre_fullinfo.c
+++ b/glib/pcre/pcre_fullinfo.c
@ -193,6 +193,10 @@ switch (what)
  *((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
  break;

+  case PCRE_INFO_MAXLOOKBEHIND:
+  *((int *)where) = re->max_lookbehind;
+  break;
+
  default: return PCRE_ERROR_BADOPTION;
  }

--- a/glib/pcre/pcre_globals.c
+++ b/glib/pcre/pcre_globals.c
@ -58,7 +58,11 @@ global variables are not used. */

 #include "pcre_internal.h"

+#ifdef GLIB_COMPILATION
 #include "gmem.h"
+#else
+#include <glib.h>
+#endif /* GLIB_COMPILATION */

 #if defined _MSC_VER || defined  __SYMBIAN32__
 static void* LocalPcreMalloc(size_t aSize)
--- a/glib/pcre/pcre_internal.h
+++ b/glib/pcre/pcre_internal.h
@ -523,6 +523,11 @@ capturing parenthesis numbers in back references. */

 #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE

+/* The maximum length of a MARK name is currently one data unit; it may be
+changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
+
+#define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)
+
 /* When UTF encoding is being used, a character is no longer just a single
 character. The macros for character handling generate simple sequences when
 used in character-mode, and more complicated ones for UTF characters.
@ -887,7 +892,8 @@ time, run time, or study time, respectively. */
   PCRE_NO_START_OPTIMIZE)

 #define PUBLIC_STUDY_OPTIONS \
-   PCRE_STUDY_JIT_COMPILE
+   (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \
+    PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)

 /* Magic number to provide a small check against being handed junk. */

@ -1939,7 +1945,11 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
       ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
-       ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT };
+       ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT };
+
+/* JIT compiling modes. The function list is indexed by them. */
+enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
+       JIT_NUMBER_OF_COMPILE_MODES };

 /* The real format of the start of the pcre block; the index of names and the
 code vector run on as long as necessary after the end. We store an explicit
@ -1969,16 +1979,15 @@ typedef struct REAL_PCRE {
  pcre_uint32 size;               /* Total that was malloced */
  pcre_uint32 options;            /* Public options */
  pcre_uint16 flags;              /* Private flags */
-  pcre_uint16 dummy1;             /* For future use */
-  pcre_uint16 top_bracket;
-  pcre_uint16 top_backref;
+  pcre_uint16 max_lookbehind;     /* Longest lookbehind (characters) */
+  pcre_uint16 top_bracket;        /* Highest numbered group */
+  pcre_uint16 top_backref;        /* Highest numbered back reference */
  pcre_uint16 first_char;         /* Starting character */
  pcre_uint16 req_char;           /* This character must be seen */
  pcre_uint16 name_table_offset;  /* Offset to name table that follows */
  pcre_uint16 name_entry_size;    /* Size of any name items */
  pcre_uint16 name_count;         /* Number of name items */
  pcre_uint16 ref_count;          /* Reference count */
-
  const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
  const pcre_uint8 *nullpad;      /* NULL padding */
 } REAL_PCRE;
@ -2024,6 +2033,7 @@ typedef struct compile_data {
  int  workspace_size;              /* Size of workspace */
  int  bracount;                    /* Count of capturing parens as we compile */
  int  final_bracount;              /* Saved value after first pass */
+  int  max_lookbehind;              /* Maximum lookbehind (characters) */
  int  top_backref;                 /* Maximum back reference */
  unsigned int backref_map;         /* Bitmap of low back refs */
  int  assert_depth;                /* Depth of nested assertions */
@ -2125,6 +2135,9 @@ typedef struct match_data {
  const  pcre_uchar *mark;        /* Mark pointer to pass back on success */
  const  pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */
  const  pcre_uchar *once_target; /* Where to back up to for atomic groups */
+#ifdef NO_RECURSE
+  void  *match_frames_base;       /* For remembering malloc'd frames */
+#endif
 } match_data;

 /* A similar structure is used for the same purpose by the DFA matching
@ -2179,7 +2192,7 @@ total length. */
 #define ctypes_offset (cbits_offset + cbit_length)
 #define tables_length (ctypes_offset + 256)

-/* Internal function prefix */
+/* Internal function and data prefixes. */

 #ifdef COMPILE_PCRE8
 #ifndef PUBL
@ -2288,9 +2301,10 @@ extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
 extern BOOL              PRIV(xclass)(int, const pcre_uchar *, BOOL);

 #ifdef SUPPORT_JIT
-extern void              PRIV(jit_compile)(const REAL_PCRE *, PUBL(extra) *);
-extern int               PRIV(jit_exec)(const REAL_PCRE *, void *,
-                           const pcre_uchar *, int, int, int, int, int *, int);
+extern void              PRIV(jit_compile)(const REAL_PCRE *,
+                           PUBL(extra) *, int);
+extern int               PRIV(jit_exec)(const REAL_PCRE *, const PUBL(extra) *,
+                           const pcre_uchar *, int, int, int, int *, int);
 extern void              PRIV(jit_free)(void *);
 extern int               PRIV(jit_get_size)(void *);
 extern const char*       PRIV(jit_get_target)(void);
@ -2298,15 +2312,6 @@ extern const char*       PRIV(jit_get_target)(void);

 /* Unicode character database (UCD) */

-typedef struct {
-  pcre_uint8 script;
-  pcre_uint8 chartype;
-  pcre_int32 other_case;
-} ucd_record;
-
-extern const ucd_record  PRIV(ucd_records)[];
-extern const pcre_uint8  PRIV(ucd_stage1)[];
-extern const pcre_uint16 PRIV(ucd_stage2)[];
 extern const int         PRIV(ucp_gentype)[];
 #ifdef SUPPORT_JIT
 extern const int         PRIV(ucp_typerange)[];
--- a/glib/pcre/pcre_jit_compile.c
+++ b/glib/pcre/pcre_jit_compile.c
--- a/glib/pcre/pcre_study.c
+++ b/glib/pcre/pcre_study.c
@ -1123,7 +1123,7 @@ do
        case OP_HSPACE:
        SET_BIT(0x09);
        SET_BIT(0x20);
-#ifdef COMPILE_PCRE8
+#ifdef SUPPORT_UTF
        if (utf)
          {
 #ifdef COMPILE_PCRE8
@ -1148,7 +1148,7 @@ do
        SET_BIT(0x0B);
        SET_BIT(0x0C);
        SET_BIT(0x0D);
-#ifdef COMPILE_PCRE8
+#ifdef SUPPORT_UTF
        if (utf)
          {
 #ifdef COMPILE_PCRE8
@ -1418,7 +1418,8 @@ we don't have to change that code. */

 if (bits_set || min > 0
 #ifdef SUPPORT_JIT
-    || (options & PCRE_STUDY_JIT_COMPILE) != 0
+    || (options & (PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
+                 | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)) != 0
 #endif
  )
  {
@ -1478,7 +1479,13 @@ if (bits_set || min > 0

 #ifdef SUPPORT_JIT
  extra->executable_jit = NULL;
-  if ((options & PCRE_STUDY_JIT_COMPILE) != 0) PRIV(jit_compile)(re, extra);
+  if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
+    PRIV(jit_compile)(re, extra, JIT_COMPILE);
+  if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
+    PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
+  if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
+    PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
+
  if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)
    {
 #ifdef COMPILE_PCRE8
--- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c
@ -587,7 +587,7 @@ const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
 unsigned int
 _pcre_ucp_othercase(const unsigned int c)
 {
-  unsigned int oc;
+  unsigned int oc = NOTACHAR;

  if ((oc = g_unichar_toupper(c)) != c)
    return oc;
--- a/glib/pcre/ucp.h
+++ b/glib/pcre/ucp.h
@ -10,7 +10,11 @@ the UCD access macros. New values that are added for new releases of Unicode
 should always be at the end of each enum, for backwards compatibility. */

 /* These are the general character categories. */
+#ifdef GLIB_COMPILATION
 #include "gunicode.h"
+#else
+#include <glib.h>
+#endif

 enum {
  ucp_C,     /* Other */
@ -60,6 +64,9 @@ enum {
 /* These are the script identifications. */

 enum {
+  ucp_Common = G_UNICODE_SCRIPT_COMMON,
+  ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
+
  ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
  ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
  ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
@ -69,7 +76,6 @@ enum {
  ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
  ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
  ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
-  ucp_Common = G_UNICODE_SCRIPT_COMMON,
  ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
  ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
  ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
@ -87,7 +93,6 @@ enum {
  ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
  ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
  ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
-  ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
  ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
  ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
  ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
--- a/glib/update-pcre/ucp.patch
+++ b/glib/update-pcre/ucp.patch
@ -1,11 +1,10 @@
-From 384879be07418fc6224b6603a2e8ca6f11e178fc Mon Sep 17 00:00:00 2001
+From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001
 From: Christian Persch <chpe@gnome.org>
 Date: Sun, 12 Feb 2012 21:20:33 +0100
 Subject: [PATCH] regex: Use glib for unicode data

 Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
 ---
- glib/pcre/Makefile.am     |    1 -
 glib/pcre/pcre_compile.c  |   26 +++---
 glib/pcre/pcre_dfa_exec.c |   96 ++++++++--------
 glib/pcre/pcre_exec.c     |   26 +++---
@ -13,25 +12,13 @@ Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
 glib/pcre/pcre_tables.c   |   16 +++
 glib/pcre/pcre_xclass.c   |   24 ++--
 glib/pcre/ucp.h           |  265 +++++++++++++++++++++++----------------------
- 8 files changed, 239 insertions(+), 226 deletions(-)
+ 7 files changed, 239 insertions(+), 225 deletions(-)

-diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am
-index 21da5c5..1981953 100644
--- a/glib/pcre/Makefile.am
-+++ b/glib/pcre/Makefile.am
-@@ -51,7 +51,6 @@ libpcre_la_SOURCES = \
- 	pcre_string_utils.c \
- 	pcre_study.c \
- 	pcre_tables.c \
-	pcre_ucd.c \
- 	pcre_valid_utf8.c \
- 	pcre_version.c \
- 	pcre_xclass.c \
 diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
-index eb985df..b44055a 100644
+index 21bef80..a6c84e1 100644
 --- a/glib/pcre/pcre_compile.c
 +++ b/glib/pcre/pcre_compile.c
-@@ -2890,43 +2890,43 @@ Returns:       TRUE if auto-possessifying is OK
+@@ -2920,43 +2920,43 @@ Returns:       TRUE if auto-possessifying is OK
 static BOOL
 check_char_prop(int c, int ptype, int pdata, BOOL negated)
 {
@ -89,10 +76,10 @@ index eb985df..b44055a 100644
   }
 return FALSE;
 diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
-index 21d7be6..41ff65b 100644
+index 9565d46..3f913ce 100644
 --- a/glib/pcre/pcre_dfa_exec.c
 +++ b/glib/pcre/pcre_dfa_exec.c
-@@ -1015,7 +1015,7 @@ for (;;)
+@@ -1060,7 +1060,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
@ -101,7 +88,7 @@ index 21d7be6..41ff65b 100644
         switch(code[1])
           {
           case PT_ANY:
-@@ -1023,43 +1023,43 @@ for (;;)
+@@ -1068,43 +1068,43 @@ for (;;)
           break;
 
           case PT_LAMP:
@ -156,7 +143,7 @@ index 21d7be6..41ff65b 100644
                c == CHAR_UNDERSCORE;
           break;
 
-@@ -1209,7 +1209,7 @@ for (;;)
+@@ -1294,7 +1294,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
@ -165,7 +152,7 @@ index 21d7be6..41ff65b 100644
         switch(code[2])
           {
           case PT_ANY:
-@@ -1217,43 +1217,43 @@ for (;;)
+@@ -1302,43 +1302,43 @@ for (;;)
           break;
 
           case PT_LAMP:
@ -220,7 +207,7 @@ index 21d7be6..41ff65b 100644
                c == CHAR_UNDERSCORE;
           break;
 
-@@ -1456,7 +1456,7 @@ for (;;)
+@@ -1541,7 +1541,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
@ -229,7 +216,7 @@ index 21d7be6..41ff65b 100644
         switch(code[2])
           {
           case PT_ANY:
-@@ -1464,43 +1464,43 @@ for (;;)
+@@ -1549,43 +1549,43 @@ for (;;)
           break;
 
           case PT_LAMP:
@ -284,7 +271,7 @@ index 21d7be6..41ff65b 100644
                c == CHAR_UNDERSCORE;
           break;
 
-@@ -1728,7 +1728,7 @@ for (;;)
+@@ -1813,7 +1813,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
@ -293,7 +280,7 @@ index 21d7be6..41ff65b 100644
         switch(code[1 + IMM2_SIZE + 1])
           {
           case PT_ANY:
-@@ -1736,43 +1736,43 @@ for (;;)
+@@ -1821,43 +1821,43 @@ for (;;)
           break;
 
           case PT_LAMP:
@ -349,10 +336,10 @@ index 21d7be6..41ff65b 100644
           break;
 
 diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
-index b715353..8eb3162 100644
+index 830b8b5..c89a3f9 100644
 --- a/glib/pcre/pcre_exec.c
 +++ b/glib/pcre/pcre_exec.c
-@@ -2507,7 +2507,7 @@ for (;;)
+@@ -2565,7 +2565,7 @@ for (;;)
       }
     GETCHARINCTEST(c, eptr);
       {
@ -361,7 +348,7 @@ index b715353..8eb3162 100644
 
       switch(ecode[1])
         {
-@@ -2516,44 +2516,44 @@ for (;;)
+@@ -2574,44 +2574,44 @@ for (;;)
         break;
 
         case PT_LAMP:
@ -416,7 +403,7 @@ index b715353..8eb3162 100644
              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
              c == CHAR_FF || c == CHAR_CR)
                == (op == OP_NOTPROP))
-@@ -2561,8 +2561,8 @@ for (;;)
+@@ -2619,8 +2619,8 @@ for (;;)
         break;
 
         case PT_WORD:
@ -428,10 +415,10 @@ index b715353..8eb3162 100644
           RRETURN(MATCH_NOMATCH);
         break;
 diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
-index e5a4b6a..41c7ee3 100644
+index 181c312..234af1b 100644
 --- a/glib/pcre/pcre_internal.h
 +++ b/glib/pcre/pcre_internal.h
-@@ -2315,15 +2315,12 @@ extern const int         PRIV(ucp_typerange)[];
+@@ -2329,15 +2329,12 @@ extern const int         PRIV(ucp_typerange)[];
 #ifdef SUPPORT_UCP
 /* UCD access macros */
 
@ -452,10 +439,10 @@ index e5a4b6a..41c7ee3 100644
 #endif /* SUPPORT_UCP */
 
 diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
-index c8134ec..47becc7 100644
+index 7ac2d89..e401974 100644
 --- a/glib/pcre/pcre_tables.c
 +++ b/glib/pcre/pcre_tables.c
-@@ -563,6 +563,22 @@ const ucp_type_table PRIV(utt)[] = {
+@@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = {
 
 const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);