Forgotten files

2025-08-20 23:58:54 +02:00 · 2011-01-22 00:01:54 -05:00
parent 3f059a6a12
commit fb2809ec99
10 changed files with 2273 additions and 1001 deletions
--- a/glib/pcre/pcre.h
+++ b/glib/pcre/pcre.h
@@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, to be #included by
 applications that call the PCRE functions.
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 #define PCRE_MAJOR          8
-#define PCRE_MINOR          02
+#define PCRE_MINOR          12
 #define PCRE_PRERELEASE     
-#define PCRE_DATE           2010-03-19
+#define PCRE_DATE           2011-01-15
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
@@ -96,41 +96,44 @@ extern "C" {
 #endif
 /* Options. Some are compile-time only, some are run-time only, and some are
-both, so we keep them all distinct. */
+both, so we keep them all distinct. However, almost all the bits in the options
 word are now used. In the long run, we may have to re-use some of the
 compile-time only bits for runtime options, or vice versa. */
-#define PCRE_CASELESS           0x00000001
+#define PCRE_CASELESS           0x00000001  /* Compile */
-#define PCRE_MULTILINE          0x00000002
+#define PCRE_MULTILINE          0x00000002  /* Compile */
-#define PCRE_DOTALL             0x00000004
+#define PCRE_DOTALL             0x00000004  /* Compile */
-#define PCRE_EXTENDED           0x00000008
+#define PCRE_EXTENDED           0x00000008  /* Compile */
-#define PCRE_ANCHORED           0x00000010
+#define PCRE_ANCHORED           0x00000010  /* Compile, exec, DFA exec */
-#define PCRE_DOLLAR_ENDONLY     0x00000020
+#define PCRE_DOLLAR_ENDONLY     0x00000020  /* Compile */
-#define PCRE_EXTRA              0x00000040
+#define PCRE_EXTRA              0x00000040  /* Compile */
-#define PCRE_NOTBOL             0x00000080
+#define PCRE_NOTBOL             0x00000080  /* Exec, DFA exec */
-#define PCRE_NOTEOL             0x00000100
+#define PCRE_NOTEOL             0x00000100  /* Exec, DFA exec */
-#define PCRE_UNGREEDY           0x00000200
+#define PCRE_UNGREEDY           0x00000200  /* Compile */
-#define PCRE_NOTEMPTY           0x00000400
+#define PCRE_NOTEMPTY           0x00000400  /* Exec, DFA exec */
-#define PCRE_UTF8               0x00000800
+#define PCRE_UTF8               0x00000800  /* Compile */
-#define PCRE_NO_AUTO_CAPTURE    0x00001000
+#define PCRE_NO_AUTO_CAPTURE    0x00001000  /* Compile */
-#define PCRE_NO_UTF8_CHECK      0x00002000
+#define PCRE_NO_UTF8_CHECK      0x00002000  /* Compile, exec, DFA exec */
-#define PCRE_AUTO_CALLOUT       0x00004000
+#define PCRE_AUTO_CALLOUT       0x00004000  /* Compile */
-#define PCRE_PARTIAL_SOFT       0x00008000
+#define PCRE_PARTIAL_SOFT       0x00008000  /* Exec, DFA exec */
 #define PCRE_PARTIAL            0x00008000  /* Backwards compatible synonym */
-#define PCRE_DFA_SHORTEST       0x00010000
+#define PCRE_DFA_SHORTEST       0x00010000  /* DFA exec */
-#define PCRE_DFA_RESTART        0x00020000
+#define PCRE_DFA_RESTART        0x00020000  /* DFA exec */
-#define PCRE_FIRSTLINE          0x00040000
+#define PCRE_FIRSTLINE          0x00040000  /* Compile */
-#define PCRE_DUPNAMES           0x00080000
+#define PCRE_DUPNAMES           0x00080000  /* Compile */
-#define PCRE_NEWLINE_CR         0x00100000
+#define PCRE_NEWLINE_CR         0x00100000  /* Compile, exec, DFA exec */
-#define PCRE_NEWLINE_LF         0x00200000
+#define PCRE_NEWLINE_LF         0x00200000  /* Compile, exec, DFA exec */
-#define PCRE_NEWLINE_CRLF       0x00300000
+#define PCRE_NEWLINE_CRLF       0x00300000  /* Compile, exec, DFA exec */
-#define PCRE_NEWLINE_ANY        0x00400000
+#define PCRE_NEWLINE_ANY        0x00400000  /* Compile, exec, DFA exec */
-#define PCRE_NEWLINE_ANYCRLF    0x00500000
+#define PCRE_NEWLINE_ANYCRLF    0x00500000  /* Compile, exec, DFA exec */
-#define PCRE_BSR_ANYCRLF        0x00800000
+#define PCRE_BSR_ANYCRLF        0x00800000  /* Compile, exec, DFA exec */
-#define PCRE_BSR_UNICODE        0x01000000
+#define PCRE_BSR_UNICODE        0x01000000  /* Compile, exec, DFA exec */
-#define PCRE_JAVASCRIPT_COMPAT  0x02000000
+#define PCRE_JAVASCRIPT_COMPAT  0x02000000  /* Compile */
-#define PCRE_NO_START_OPTIMIZE  0x04000000
+#define PCRE_NO_START_OPTIMIZE  0x04000000  /* Compile, exec, DFA exec */
-#define PCRE_NO_START_OPTIMISE  0x04000000
+#define PCRE_NO_START_OPTIMISE  0x04000000  /* Synonym */
-#define PCRE_PARTIAL_HARD       0x08000000
+#define PCRE_PARTIAL_HARD       0x08000000  /* Exec, DFA exec */
-#define PCRE_NOTEMPTY_ATSTART   0x10000000
+#define PCRE_NOTEMPTY_ATSTART   0x10000000  /* Exec, DFA exec */
 #define PCRE_UCP                0x20000000  /* Compile */
 /* Exec-time and get/set-time error codes */
@@ -158,6 +161,8 @@ both, so we keep them all distinct. */
 #define PCRE_ERROR_RECURSIONLIMIT (-21)
 #define PCRE_ERROR_NULLWSLIMIT    (-22)  /* No longer actually used */
 #define PCRE_ERROR_BADNEWLINE     (-23)
 #define PCRE_ERROR_BADOFFSET      (-24)
 #define PCRE_ERROR_SHORTUTF8      (-25)
 /* Request types for pcre_fullinfo() */
@@ -200,6 +205,7 @@ these bits, just add new ones on the end, in order to remain compatible. */
 #define PCRE_EXTRA_CALLOUT_DATA           0x0004
 #define PCRE_EXTRA_TABLES                 0x0008
 #define PCRE_EXTRA_MATCH_LIMIT_RECURSION  0x0010
 #define PCRE_EXTRA_MARK                   0x0020
 /* Types */
@@ -225,6 +231,7 @@ typedef struct pcre_extra {
  void *callout_data;             /* Data passed back in callouts */
  const unsigned char *tables;    /* Pointer to character tables */
  unsigned long int match_limit_recursion; /* Max recursive calls to match() */
  unsigned char **mark;           /* For passing back a mark pointer */
 } pcre_extra;
 /* The structure for passing out data via the pcre_callout_function. We use a
--- a/glib/pcre/pcre_chartables.c
+++ b/glib/pcre/pcre_chartables.c
@@ -14,7 +14,7 @@ example ISO-8859-1. When dftables is run, it creates these tables in the
 current locale. If PCRE is configured with --enable-rebuild-chartables, this
 happens automatically.
-The following #includes are present because without the gcc 4.x may remove the
+The following #includes are present because without them gcc 4.x may remove the
 array definition from the final binary if PCRE is built into a static library
 and dead code stripping is activated. This leads to link errors. Pulling in the
 header ensures that the array gets flagged as "someone outside this compilation
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@@ -106,7 +106,7 @@ never stored, so we push them well clear of the normal opcodes. */
 /* This table identifies those opcodes that are followed immediately by a
-character that is to be tested in some way. This makes is possible to
+character that is to be tested in some way. This makes it possible to
 centralize the loading of these characters. In the case of Type * etc, the
 "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
 small value. Non-zero values in the table are the offsets from the opcode where
@@ -161,8 +161,9 @@ static const uschar coptable[] = {
  0, 0,                          /* RREF, NRREF                            */
  0,                             /* DEF                                    */
  0, 0,                          /* BRAZERO, BRAMINZERO                    */
-  0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
+  0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */
-  0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
+  0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */
  0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */
 };
 /* This table identifies those opcodes that inspect a character. It is used to
@@ -218,8 +219,9 @@ static const uschar poptable[] = {
  0, 0,                          /* RREF, NRREF                            */
  0,                             /* DEF                                    */
  0, 0,                          /* BRAZERO, BRAMINZERO                    */
-  0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
+  0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */
-  0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
+  0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */
  0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */
 };
 /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
@@ -473,7 +475,7 @@ if (*first_op == OP_REVERSE)
    {
    gone_back = (current_subject - max_back < start_subject)?
-      current_subject - start_subject : max_back;
+      (int)(current_subject - start_subject) : max_back;
    current_subject -= gone_back;
    }
@@ -490,7 +492,7 @@ if (*first_op == OP_REVERSE)
    int back = GET(end_code, 2+LINK_SIZE);
    if (back <= gone_back)
      {
-      int bstate = end_code - start_code + 2 + 2*LINK_SIZE;
+      int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
      ADD_NEW_DATA(-bstate, 0, gone_back - back);
      }
    end_code += GET(end_code, 1);
@@ -526,7 +528,7 @@ else
      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
    do
      {
-      ADD_NEW(end_code - start_code + length, 0);
+      ADD_NEW((int)(end_code - start_code + length), 0);
      end_code += GET(end_code, 1);
      length = 1 + LINK_SIZE;
      }
@@ -753,8 +755,8 @@ for (;;)
          if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
          if (offsetcount >= 2)
            {
-            offsets[0] = current_subject - start_subject;
+            offsets[0] = (int)(current_subject - start_subject);
-            offsets[1] = ptr - start_subject;
+            offsets[1] = (int)(ptr - start_subject);
            DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
              offsets[1] - offsets[0], current_subject));
            }
@@ -776,7 +778,7 @@ for (;;)
      /*-----------------------------------------------------------------*/
      case OP_ALT:
      do { code += GET(code, 1); } while (*code == OP_ALT);
-      ADD_ACTIVE(code - start_code, 0);
+      ADD_ACTIVE((int)(code - start_code), 0);
      break;
      /*-----------------------------------------------------------------*/
@@ -784,7 +786,7 @@ for (;;)
      case OP_SBRA:
      do
        {
-        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
+        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
        code += GET(code, 1);
        }
      while (*code == OP_ALT);
@@ -793,11 +795,11 @@ for (;;)
      /*-----------------------------------------------------------------*/
      case OP_CBRA:
      case OP_SCBRA:
-      ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
+      ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE),  0);
      code += GET(code, 1);
      while (*code == OP_ALT)
        {
-        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
+        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
        code += GET(code, 1);
        }
      break;
@@ -808,14 +810,14 @@ for (;;)
      ADD_ACTIVE(state_offset + 1, 0);
      code += 1 + GET(code, 2);
      while (*code == OP_ALT) code += GET(code, 1);
-      ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
+      ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
      break;
      /*-----------------------------------------------------------------*/
      case OP_SKIPZERO:
      code += 1 + GET(code, 2);
      while (*code == OP_ALT) code += GET(code, 1);
-      ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
+      ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
      break;
      /*-----------------------------------------------------------------*/
@@ -829,7 +831,12 @@ for (;;)
      /*-----------------------------------------------------------------*/
      case OP_EOD:
-      if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }
+      if (ptr >= end_subject)
        {
        if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
          could_continue = TRUE;
        else { ADD_ACTIVE(state_offset + 1, 0); }
        }
      break;
      /*-----------------------------------------------------------------*/
@@ -869,7 +876,9 @@ for (;;)
      /*-----------------------------------------------------------------*/
      case OP_EODN:
-      if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
+      if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
        could_continue = TRUE;
      else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
        { ADD_ACTIVE(state_offset + 1, 0); }
      break;
@@ -877,7 +886,9 @@ for (;;)
      case OP_DOLL:
      if ((md->moptions & PCRE_NOTEOL) == 0)
        {
-        if (clen == 0 ||
+        if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
          could_continue = TRUE;
        else if (clen == 0 ||
            ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
               ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
            ))
@@ -920,13 +931,37 @@ for (;;)
          if (utf8) BACKCHAR(temp);
 #endif
          GETCHARTEST(d, temp);
 #ifdef SUPPORT_UCP
          if ((md->poptions & PCRE_UCP) != 0)
            {
            if (d == '_') left_word = TRUE; else
              {
              int cat = UCD_CATEGORY(d);
              left_word = (cat == ucp_L || cat == ucp_N);
              }
            }
          else
 #endif
          left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
          }
-        else left_word = 0;
+        else left_word = FALSE;
        if (clen > 0)
          {
 #ifdef SUPPORT_UCP
          if ((md->poptions & PCRE_UCP) != 0)
            {
            if (c == '_') right_word = TRUE; else
              {
              int cat = UCD_CATEGORY(c);
              right_word = (cat == ucp_L || cat == ucp_N);
              }
            }
          else
 #endif
          right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
-        else right_word = 0;
+          }
        else right_word = FALSE;
        if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
          { ADD_ACTIVE(state_offset + 1, 0); }
@@ -953,7 +988,8 @@ for (;;)
          break;
          case PT_LAMP:
-          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
               chartype == ucp_Lt;
          break;
          case PT_GC:
@@ -968,6 +1004,30 @@ for (;;)
          OK = UCD_SCRIPT(c) == code[2];
          break;
          /* These are specials for combination cases. */
          case PT_ALNUM:
          OK = _pcre_ucp_gentype[chartype] == ucp_L ||
               _pcre_ucp_gentype[chartype] == ucp_N;
          break;
          case PT_SPACE:    /* Perl space */
          OK = _pcre_ucp_gentype[chartype] == ucp_Z ||
               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
          break;
          case PT_PXSPACE:  /* POSIX space */
          OK = _pcre_ucp_gentype[chartype] == ucp_Z ||
               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
               c == CHAR_FF || c == CHAR_CR;
          break;
          case PT_WORD:
          OK = _pcre_ucp_gentype[chartype] == ucp_L ||
               _pcre_ucp_gentype[chartype] == ucp_N ||
               c == CHAR_UNDERSCORE;
          break;
          /* Should never occur, but keep compilers from grumbling. */
          default:
@@ -1122,7 +1182,8 @@ for (;;)
          break;
          case PT_LAMP:
-          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
            chartype == ucp_Lt;
          break;
          case PT_GC:
@@ -1137,6 +1198,30 @@ for (;;)
          OK = UCD_SCRIPT(c) == code[3];
          break;
          /* These are specials for combination cases. */
          case PT_ALNUM:
          OK = _pcre_ucp_gentype[chartype] == ucp_L ||
               _pcre_ucp_gentype[chartype] == ucp_N;
          break;
          case PT_SPACE:    /* Perl space */
          OK = _pcre_ucp_gentype[chartype] == ucp_Z ||
               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
          break;
          case PT_PXSPACE:  /* POSIX space */
          OK = _pcre_ucp_gentype[chartype] == ucp_Z ||
               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
               c == CHAR_FF || c == CHAR_CR;
          break;
          case PT_WORD:
          OK = _pcre_ucp_gentype[chartype] == ucp_L ||
               _pcre_ucp_gentype[chartype] == ucp_N ||
               c == CHAR_UNDERSCORE;
          break;
          /* Should never occur, but keep compilers from grumbling. */
          default:
@@ -1344,7 +1429,8 @@ for (;;)
          break;
          case PT_LAMP:
-          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
            chartype == ucp_Lt;
          break;
          case PT_GC:
@@ -1359,6 +1445,30 @@ for (;;)
          OK = UCD_SCRIPT(c) == code[3];
          break;
          /* These are specials for combination cases. */
          case PT_ALNUM:
          OK = _pcre_ucp_gentype[chartype] == ucp_L ||
               _pcre_ucp_gentype[chartype] == ucp_N;
          break;
          case PT_SPACE:    /* Perl space */
          OK = _pcre_ucp_gentype[chartype] == ucp_Z ||
               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
          break;
          case PT_PXSPACE:  /* POSIX space */
          OK = _pcre_ucp_gentype[chartype] == ucp_Z ||
               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
               c == CHAR_FF || c == CHAR_CR;
          break;
          case PT_WORD:
          OK = _pcre_ucp_gentype[chartype] == ucp_L ||
               _pcre_ucp_gentype[chartype] == ucp_N ||
               c == CHAR_UNDERSCORE;
          break;
          /* Should never occur, but keep compilers from grumbling. */
          default:
@@ -1591,7 +1701,8 @@ for (;;)
          break;
          case PT_LAMP:
-          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
            chartype == ucp_Lt;
          break;
          case PT_GC:
@@ -1606,6 +1717,30 @@ for (;;)
          OK = UCD_SCRIPT(c) == code[5];
          break;
          /* These are specials for combination cases. */
          case PT_ALNUM:
          OK = _pcre_ucp_gentype[chartype] == ucp_L ||
               _pcre_ucp_gentype[chartype] == ucp_N;
          break;
          case PT_SPACE:    /* Perl space */
          OK = _pcre_ucp_gentype[chartype] == ucp_Z ||
               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
          break;
          case PT_PXSPACE:  /* POSIX space */
          OK = _pcre_ucp_gentype[chartype] == ucp_Z ||
               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
               c == CHAR_FF || c == CHAR_CR;
          break;
          case PT_WORD:
          OK = _pcre_ucp_gentype[chartype] == ucp_L ||
               _pcre_ucp_gentype[chartype] == ucp_N ||
               c == CHAR_UNDERSCORE;
          break;
          /* Should never occur, but keep compilers from grumbling. */
          default:
@@ -2233,7 +2368,7 @@ for (;;)
        points to the byte after the end of the class. If there is a
        quantifier, this is where it will be. */
-        next_state_offset = ecode - start_code;
+        next_state_offset = (int)(ecode - start_code);
        switch (*ecode)
          {
@@ -2304,7 +2439,7 @@ for (;;)
          md,                                   /* static match data */
          code,                                 /* this subexpression's code */
          ptr,                                  /* where we currently are */
-          ptr - start_subject,                  /* start offset */
+          (int)(ptr - start_subject),           /* start offset */
          local_offsets,                        /* offset vector */
          sizeof(local_offsets)/sizeof(int),    /* size of same */
          local_workspace,                      /* workspace vector */
@@ -2315,7 +2450,7 @@ for (;;)
        if (rc == PCRE_ERROR_DFA_UITEM) return rc;
        if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
-            { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
+            { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
        }
      break;
@@ -2342,9 +2477,9 @@ for (;;)
            cb.callout_number   = code[LINK_SIZE+2];
            cb.offset_vector    = offsets;
            cb.subject          = (PCRE_SPTR)start_subject;
-            cb.subject_length   = end_subject - start_subject;
+            cb.subject_length   = (int)(end_subject - start_subject);
-            cb.start_match      = current_subject - start_subject;
+            cb.start_match      = (int)(current_subject - start_subject);
-            cb.current_position = ptr - start_subject;
+            cb.current_position = (int)(ptr - start_subject);
            cb.pattern_position = GET(code, LINK_SIZE + 3);
            cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
            cb.capture_top      = 1;
@@ -2395,7 +2530,7 @@ for (;;)
            md,                                   /* fixed match data */
            asscode,                              /* this subexpression's code */
            ptr,                                  /* where we currently are */
-            ptr - start_subject,                  /* start offset */
+            (int)(ptr - start_subject),           /* start offset */
            local_offsets,                        /* offset vector */
            sizeof(local_offsets)/sizeof(int),    /* size of same */
            local_workspace,                      /* workspace vector */
@@ -2407,7 +2542,7 @@ for (;;)
          if (rc == PCRE_ERROR_DFA_UITEM) return rc;
          if ((rc >= 0) ==
                (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
-            { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
+            { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
          else
            { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
          }
@@ -2428,7 +2563,7 @@ for (;;)
          md,                                   /* fixed match data */
          start_code + GET(code, 1),            /* this subexpression's code */
          ptr,                                  /* where we currently are */
-          ptr - start_subject,                  /* start offset */
+          (int)(ptr - start_subject),           /* start offset */
          local_offsets,                        /* offset vector */
          sizeof(local_offsets)/sizeof(int),    /* size of same */
          local_workspace,                      /* workspace vector */
@@ -2480,7 +2615,7 @@ for (;;)
          md,                                   /* fixed match data */
          code,                                 /* this subexpression's code */
          ptr,                                  /* where we currently are */
-          ptr - start_subject,                  /* start offset */
+          (int)(ptr - start_subject),           /* start offset */
          local_offsets,                        /* offset vector */
          sizeof(local_offsets)/sizeof(int),    /* size of same */
          local_workspace,                      /* workspace vector */
@@ -2497,7 +2632,8 @@ for (;;)
          do { end_subpattern += GET(end_subpattern, 1); }
            while (*end_subpattern == OP_ALT);
-          next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;
+          next_state_offset =
            (int)(end_subpattern - start_code + LINK_SIZE + 1);
          /* If the end of this subpattern is KETRMAX or KETRMIN, we must
          arrange for the repeat state also to be added to the relevant list.
@@ -2505,7 +2641,7 @@ for (;;)
          repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
                                 *end_subpattern == OP_KETRMIN)?
-            end_subpattern - start_code - GET(end_subpattern, 1) : -1;
+            (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
          /* If we have matched an empty string, add the next state at the
          current character pointer. This is important so that the duplicate
@@ -2569,9 +2705,9 @@ for (;;)
        cb.callout_number   = code[1];
        cb.offset_vector    = offsets;
        cb.subject          = (PCRE_SPTR)start_subject;
-        cb.subject_length   = end_subject - start_subject;
+        cb.subject_length   = (int)(end_subject - start_subject);
-        cb.start_match      = current_subject - start_subject;
+        cb.start_match      = (int)(current_subject - start_subject);
-        cb.current_position = ptr - start_subject;
+        cb.current_position = (int)(ptr - start_subject);
        cb.pattern_position = GET(code, 2);
        cb.next_item_length = GET(code, 2 + LINK_SIZE);
        cb.capture_top      = 1;
@@ -2617,13 +2753,13 @@ for (;;)
        ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
         match_count < 0)                            /* no matches */
        ) &&                                         /* And... */
-        ptr >= end_subject &&                     /* Reached end of subject */
+        ptr >= end_subject &&                  /* Reached end of subject */
-        ptr > current_subject)                    /* Matched non-empty string */
+        ptr > md->start_used_ptr)              /* Inspected non-empty string */
      {
      if (offsetcount >= 2)
        {
-        offsets[0] = md->start_used_ptr - start_subject;
+        offsets[0] = (int)(md->start_used_ptr - start_subject);
-        offsets[1] = end_subject - start_subject;
+        offsets[1] = (int)(end_subject - start_subject);
        }
      match_count = PCRE_ERROR_PARTIAL;
      }
@@ -2708,6 +2844,7 @@ if (re == NULL || subject == NULL || workspace == NULL ||
   (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
 /* We need to find the pointer to any study data before we test for byte
 flipping, so we scan the extra_data block first. This may set two fields in the
@@ -2826,16 +2963,14 @@ back the character offset. */
 #ifdef SUPPORT_UTF8
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
  {
-  if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
+  int tb;
-    return PCRE_ERROR_BADUTF8;
+  if ((tb = _pcre_valid_utf8((uschar *)subject, length)) >= 0)
    return (tb == length && (options & PCRE_PARTIAL_HARD) != 0)?
      PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
  if (start_offset > 0 && start_offset < length)
    {
-    int tb = ((uschar *)subject)[start_offset];
+    tb = ((USPTR)subject)[start_offset] & 0xc0;
-    if (tb > 127)
+    if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
      {
      tb &= 0xc0;
      if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
      }
    }
  }
 #endif
@@ -2922,9 +3057,11 @@ for (;;)
    /* There are some optimizations that avoid running the match if a known
    starting point is not found. However, there is an option that disables
-    these, for testing and for ensuring that all callouts do actually occur. */
+    these, for testing and for ensuring that all callouts do actually occur.
    The option can be set in the regex by (*NO_START_OPT) or passed in
    match-time options. */
-    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
+    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
      {
      /* Advance to a known first byte. */
@@ -2982,8 +3119,16 @@ for (;;)
        while (current_subject < end_subject)
          {
          register unsigned int c = *current_subject;
-          if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
+          if ((start_bits[c/8] & (1 << (c&7))) == 0)
-            else break;
+            {
            current_subject++;
 #ifdef SUPPORT_UTF8
            if (utf8)
              while(current_subject < end_subject &&
                    (*current_subject & 0xc0) == 0x80) current_subject++;
 #endif
            }
          else break;
          }
        }
      }
--- a/glib/pcre/pcre_exec.c
+++ b/glib/pcre/pcre_exec.c
--- a/glib/pcre/pcre_internal.h
+++ b/glib/pcre/pcre_internal.h
@@ -408,9 +408,10 @@ capturing parenthesis numbers in back references. */
 /* When UTF-8 encoding is being used, a character is no longer just a single
 byte. The macros for character handling generate simple sequences when used in
-byte-mode, and more complicated ones for UTF-8 characters. BACKCHAR should
+byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is
-never be called in byte mode. To make sure it can never even appear when UTF-8
+not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should
-support is omitted, we don't even define it. */
+never be called in byte mode. To make sure they can never even appear when
 UTF-8 support is omitted, we don't even define them. */
 #ifndef SUPPORT_UTF8
 #define GETCHAR(c, eptr) c = *eptr;
@@ -418,43 +419,83 @@ support is omitted, we don't even define it. */
 #define GETCHARINC(c, eptr) c = *eptr++;
 #define GETCHARINCTEST(c, eptr) c = *eptr++;
 #define GETCHARLEN(c, eptr, len) c = *eptr;
 /* #define GETCHARLENTEST(c, eptr, len) */
 /* #define BACKCHAR(eptr) */
 #else   /* SUPPORT_UTF8 */
 /* These macros were originally written in the form of loops that used data
 from the tables whose names start with _pcre_utf8_table. They were rewritten by
 a user so as not to use loops, because in some environments this gives a
 significant performance advantage, and it seems never to do any harm. */
 /* Base macro to pick up the remaining bytes of a UTF-8 character, not
 advancing the pointer. */
 #define GETUTF8(c, eptr) \
    { \
    if ((c & 0x20) == 0) \
      c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
    else if ((c & 0x10) == 0) \
      c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
    else if ((c & 0x08) == 0) \
      c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \
      ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \
    else if ((c & 0x04) == 0) \
      c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \
          ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \
          (eptr[4] & 0x3f); \
    else \
      c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \
          ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \
          ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
    }
 /* Get the next UTF-8 character, not advancing the pointer. This is called when
 we know we are in UTF-8 mode. */
 #define GETCHAR(c, eptr) \
  c = *eptr; \
-  if (c >= 0xc0) \
+  if (c >= 0xc0) GETUTF8(c, eptr);
    { \
    int gcii; \
    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
    int gcss = 6*gcaa; \
    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
    for (gcii = 1; gcii <= gcaa; gcii++) \
      { \
      gcss -= 6; \
      c |= (eptr[gcii] & 0x3f) << gcss; \
      } \
    }
 /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
 pointer. */
 #define GETCHARTEST(c, eptr) \
  c = *eptr; \
-  if (utf8 && c >= 0xc0) \
+  if (utf8 && c >= 0xc0) GETUTF8(c, eptr);
 /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
 the pointer. */
 #define GETUTF8INC(c, eptr) \
    { \
-    int gcii; \
+    if ((c & 0x20) == 0) \
-    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
+      c = ((c & 0x1f) << 6) | (*eptr++ & 0x3f); \
-    int gcss = 6*gcaa; \
+    else if ((c & 0x10) == 0) \
    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
    for (gcii = 1; gcii <= gcaa; gcii++) \
      { \
-      gcss -= 6; \
+      c = ((c & 0x0f) << 12) | ((*eptr & 0x3f) << 6) | (eptr[1] & 0x3f); \
-      c |= (eptr[gcii] & 0x3f) << gcss; \
+      eptr += 2; \
      } \
    else if ((c & 0x08) == 0) \
      { \
      c = ((c & 0x07) << 18) | ((*eptr & 0x3f) << 12) | \
          ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
      eptr += 3; \
      } \
    else if ((c & 0x04) == 0) \
      { \
      c = ((c & 0x03) << 24) | ((*eptr & 0x3f) << 18) | \
          ((eptr[1] & 0x3f) << 12) | ((eptr[2] & 0x3f) << 6) | \
          (eptr[3] & 0x3f); \
      eptr += 4; \
      } \
    else \
      { \
      c = ((c & 0x01) << 30) | ((*eptr & 0x3f) << 24) | \
          ((eptr[1] & 0x3f) << 18) | ((eptr[2] & 0x3f) << 12) | \
          ((eptr[3] & 0x3f) << 6) | (eptr[4] & 0x3f); \
      eptr += 5; \
      } \
    }
@@ -463,31 +504,49 @@ know we are in UTF-8 mode. */
 #define GETCHARINC(c, eptr) \
  c = *eptr++; \
-  if (c >= 0xc0) \
+  if (c >= 0xc0) GETUTF8INC(c, eptr);
    { \
    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
    int gcss = 6*gcaa; \
    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
    while (gcaa-- > 0) \
      { \
      gcss -= 6; \
      c |= (*eptr++ & 0x3f) << gcss; \
      } \
    }
-/* Get the next character, testing for UTF-8 mode, and advancing the pointer */
+/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
 This is called when we don't know if we are in UTF-8 mode. */
 #define GETCHARINCTEST(c, eptr) \
  c = *eptr++; \
-  if (utf8 && c >= 0xc0) \
+  if (utf8 && c >= 0xc0) GETUTF8INC(c, eptr);
 /* Base macro to pick up the remaining bytes of a UTF-8 character, not
 advancing the pointer, incrementing the length. */
 #define GETUTF8LEN(c, eptr, len) \
    { \
-    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
+    if ((c & 0x20) == 0) \
    int gcss = 6*gcaa; \
    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
    while (gcaa-- > 0) \
      { \
-      gcss -= 6; \
+      c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
-      c |= (*eptr++ & 0x3f) << gcss; \
+      len++; \
      } \
    else if ((c & 0x10)  == 0) \
      { \
      c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
      len += 2; \
      } \
    else if ((c & 0x08)  == 0) \
      {\
      c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \
          ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \
      len += 3; \
      } \
    else if ((c & 0x04)  == 0) \
      { \
      c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \
          ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \
          (eptr[4] & 0x3f); \
      len += 4; \
      } \
    else \
      {\
      c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \
          ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \
          ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
      len += 5; \
      } \
    }
@@ -496,39 +555,15 @@ if there are extra bytes. This is called when we know we are in UTF-8 mode. */
 #define GETCHARLEN(c, eptr, len) \
  c = *eptr; \
-  if (c >= 0xc0) \
+  if (c >= 0xc0) GETUTF8LEN(c, eptr, len);
    { \
    int gcii; \
    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
    int gcss = 6*gcaa; \
    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
    for (gcii = 1; gcii <= gcaa; gcii++) \
      { \
      gcss -= 6; \
      c |= (eptr[gcii] & 0x3f) << gcss; \
      } \
    len += gcaa; \
    }
 /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
 pointer, incrementing length if there are extra bytes. This is called when we
-know we are in UTF-8 mode. */
+do not know if we are in UTF-8 mode. */
 #define GETCHARLENTEST(c, eptr, len) \
  c = *eptr; \
-  if (utf8 && c >= 0xc0) \
+  if (utf8 && c >= 0xc0) GETUTF8LEN(c, eptr, len);
    { \
    int gcii; \
    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
    int gcss = 6*gcaa; \
    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
    for (gcii = 1; gcii <= gcaa; gcii++) \
      { \
      gcss -= 6; \
      c |= (eptr[gcii] & 0x3f) << gcss; \
      } \
    len += gcaa; \
    }
 /* If the pointer is not at the start of a character, move it back until
 it is. This is called only in UTF-8 mode - we don't put a test within the macro
@@ -536,7 +571,7 @@ because almost all calls are already within a block of UTF-8 only code. */
 #define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
-#endif
+#endif  /* SUPPORT_UTF8 */
 /* In case there is no definition of offsetof() provided - though any proper
@@ -580,7 +615,7 @@ time, run time, or study time, respectively. */
   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
   PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
   PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
-   PCRE_JAVASCRIPT_COMPAT)
+   PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE)
 #define PUBLIC_EXEC_OPTIONS \
  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
@@ -620,7 +655,7 @@ variable-length repeat, or a anything other than literal characters. */
 environments where these macros are defined elsewhere. Unfortunately, there
 is no way to do the same for the typedef. */
-typedef gboolean  BOOL;
+typedef gboolean BOOL;
 /* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
 character constants like '*' because the compiler would emit their EBCDIC code,
@@ -870,6 +905,7 @@ so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
 #define STRING_COMMIT0              "COMMIT\0"
 #define STRING_F0                   "F\0"
 #define STRING_FAIL0                "FAIL\0"
 #define STRING_MARK0                "MARK\0"
 #define STRING_PRUNE0               "PRUNE\0"
 #define STRING_SKIP0                "SKIP\0"
 #define STRING_THEN                 "THEN"
@@ -891,14 +927,16 @@ so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
 #define STRING_DEFINE               "DEFINE"
-#define STRING_CR_RIGHTPAR          "CR)"
+#define STRING_CR_RIGHTPAR             "CR)"
-#define STRING_LF_RIGHTPAR          "LF)"
+#define STRING_LF_RIGHTPAR             "LF)"
-#define STRING_CRLF_RIGHTPAR        "CRLF)"
+#define STRING_CRLF_RIGHTPAR           "CRLF)"
-#define STRING_ANY_RIGHTPAR         "ANY)"
+#define STRING_ANY_RIGHTPAR            "ANY)"
-#define STRING_ANYCRLF_RIGHTPAR     "ANYCRLF)"
+#define STRING_ANYCRLF_RIGHTPAR        "ANYCRLF)"
-#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
+#define STRING_BSR_ANYCRLF_RIGHTPAR    "BSR_ANYCRLF)"
-#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
+#define STRING_BSR_UNICODE_RIGHTPAR    "BSR_UNICODE)"
-#define STRING_UTF8_RIGHTPAR        "UTF8)"
+#define STRING_UTF8_RIGHTPAR           "UTF8)"
 #define STRING_UCP_RIGHTPAR            "UCP)"
 #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"
 #else  /* SUPPORT_UTF8 */
@@ -1122,6 +1160,7 @@ only. */
 #define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"
 #define STRING_F0                   STR_F "\0"
 #define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"
 #define STRING_MARK0                STR_M STR_A STR_R STR_K "\0"
 #define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"
 #define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"
 #define STRING_THEN                 STR_T STR_H STR_E STR_N
@@ -1143,14 +1182,16 @@ only. */
 #define STRING_DEFINE               STR_D STR_E STR_F STR_I STR_N STR_E
-#define STRING_CR_RIGHTPAR          STR_C STR_R STR_RIGHT_PARENTHESIS
+#define STRING_CR_RIGHTPAR             STR_C STR_R STR_RIGHT_PARENTHESIS
-#define STRING_LF_RIGHTPAR          STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_LF_RIGHTPAR             STR_L STR_F STR_RIGHT_PARENTHESIS
-#define STRING_CRLF_RIGHTPAR        STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_CRLF_RIGHTPAR           STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
-#define STRING_ANY_RIGHTPAR         STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
+#define STRING_ANY_RIGHTPAR            STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
-#define STRING_ANYCRLF_RIGHTPAR     STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_ANYCRLF_RIGHTPAR        STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
-#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_BSR_ANYCRLF_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
-#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
+#define STRING_BSR_UNICODE_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
-#define STRING_UTF8_RIGHTPAR        STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+#define STRING_UTF8_RIGHTPAR           STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
 #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
 #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
 #endif  /* SUPPORT_UTF8 */
@@ -1183,9 +1224,13 @@ only. */
 #define PT_ANY        0    /* Any property - matches all chars */
 #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
-#define PT_GC         2    /* General characteristic (e.g. L) */
+#define PT_GC         2    /* Specified general characteristic (e.g. L) */
-#define PT_PC         3    /* Particular characteristic (e.g. Lu) */
+#define PT_PC         3    /* Specified particular characteristic (e.g. Lu) */
 #define PT_SC         4    /* Script (e.g. Han) */
 #define PT_ALNUM      5    /* Alphanumeric - the union of L and N */
 #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
 #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
 #define PT_WORD       8    /* Word - L plus N plus underscore */
 /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
 contain UTF-8 characters with values greater than 255. */
@@ -1202,9 +1247,15 @@ contain UTF-8 characters with values greater than 255. */
 /* These are escaped items that aren't just an encoding of a particular data
 value such as \n. They must have non-zero values, as check_escape() returns
 their negation. Also, they must appear in the same order as in the opcode
-definitions below, up to ESC_z. There's a dummy for OP_ANY because it
+definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
-corresponds to "." rather than an escape sequence, and another for OP_ALLANY
+corresponds to "." in DOTALL mode rather than an escape sequence. It is also
-(which is used for [^] in JavaScript compatibility mode).
+used for [^] in JavaScript compatibility mode. In non-DOTALL mode, "." behaves
 like \N.
 The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
 when PCRE_UCP is set, when replacement of \d etc by \p sequences is required.
 They must be contiguous, and remain in order so that the replacements can be
 looked up from a table.
 The final escape must be ESC_REF as subsequent values are used for
 backreferences (\1, \2, \3, etc). There are two tests in the code for an escape
@@ -1214,11 +1265,12 @@ put in between that don't consume a character, that code will have to change.
 */
 enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
-       ESC_W, ESC_w, ESC_dum1, ESC_dum2, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
+       ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
-       ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k,
+       ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
       ESC_E, ESC_Q, ESC_g, ESC_k,
       ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu,
       ESC_REF };
 /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
 OP_EOD must correspond in order to the list of escapes immediately above.
@@ -1242,8 +1294,8 @@ enum {
  OP_WHITESPACE,         /*  9 \s */
  OP_NOT_WORDCHAR,       /* 10 \W */
  OP_WORDCHAR,           /* 11 \w */
-  OP_ANY,            /* 12 Match any character (subject to DOTALL) */
+  OP_ANY,            /* 12 Match any character except newline */
-  OP_ALLANY,         /* 13 Match any character (not subject to DOTALL) */
+  OP_ALLANY,         /* 13 Match any character */
  OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
  OP_NOTPROP,        /* 15 \P (not Unicode property) */
  OP_PROP,           /* 16 \p (Unicode property) */
@@ -1373,20 +1425,24 @@ enum {
  /* These are backtracking control verbs */
-  OP_PRUNE,          /* 107 */
+  OP_MARK,           /* 107 always has an argument */
-  OP_SKIP,           /* 108 */
+  OP_PRUNE,          /* 108 */
-  OP_THEN,           /* 109 */
+  OP_PRUNE_ARG,      /* 109 same, but with argument */
-  OP_COMMIT,         /* 110 */
+  OP_SKIP,           /* 110 */
  OP_SKIP_ARG,       /* 111 same, but with argument */
  OP_THEN,           /* 112 */
  OP_THEN_ARG,       /* 113 same, but with argument */
  OP_COMMIT,         /* 114 */
  /* These are forced failure and success verbs */
-  OP_FAIL,           /* 111 */
+  OP_FAIL,           /* 115 */
-  OP_ACCEPT,         /* 112 */
+  OP_ACCEPT,         /* 116 */
-  OP_CLOSE,          /* 113 Used before OP_ACCEPT to close open captures */
+  OP_CLOSE,          /* 117 Used before OP_ACCEPT to close open captures */
  /* This is used to skip a subpattern with a {0} quantifier */
-  OP_SKIPZERO,       /* 114 */
+  OP_SKIPZERO,       /* 118 */
  /* This is not an opcode, but is used to check that tables indexed by opcode
  are the correct length, in order to catch updating errors - there have been
@@ -1397,7 +1453,7 @@ enum {
 /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
 definitions that follow must also be updated to match. There are also tables
-called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */
+called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
 /* This macro defines textual names for all the opcodes. These are used only
@@ -1422,7 +1478,8 @@ for debugging. The macro is referenced only in pcre_printint.c. */
  "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \
  "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \
  "Brazero", "Braminzero",                                        \
-  "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",      \
+  "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
  "*THEN", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",                \
  "Close", "Skip zero"
@@ -1488,8 +1545,10 @@ in UTF-8 mode. The code that uses this table must know about such things. */
  3, 3,                          /* RREF, NRREF                            */ \
  1,                             /* DEF                                    */ \
  1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
-  1, 1, 1, 1,                    /* PRUNE, SKIP, THEN, COMMIT,             */ \
+  3, 1, 3,                       /* MARK, PRUNE, PRUNE_ARG                 */ \
-  1, 1, 3, 1                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
+  1, 3,                          /* SKIP, SKIP_ARG                         */ \
  1+LINK_SIZE, 3+LINK_SIZE,      /* THEN, THEN_ARG                         */ \
  1, 1, 1, 3, 1                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */
 /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
@@ -1507,7 +1566,8 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
       ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
-       ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERRCOUNT };
+       ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68,
       ERRCOUNT };
 /* The real format of the start of the pcre block; the index of names and the
 code vector run on as long as necessary after the end. We store an explicit
@@ -1650,6 +1710,7 @@ typedef struct match_data {
  BOOL   noteol;                /* NOTEOL flag */
  BOOL   utf8;                  /* UTF8 flag */
  BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */
  BOOL   use_ucp;               /* PCRE_UCP flag */
  BOOL   endonly;               /* Dollar not before final \n */
  BOOL   notempty;              /* Empty string match not wanted */
  BOOL   notempty_atstart;      /* Empty string match at start not wanted */
@@ -1669,6 +1730,7 @@ typedef struct match_data {
  int    eptrn;                 /* Next free eptrblock */
  recursion_info *recursive;    /* Linked list of recursion data */
  void  *callout_data;          /* To pass back to callouts */
  const uschar *mark;           /* Mark pointer to pass back */
 } match_data;
 /* A similar structure is used for the same purpose by the DFA matching
@@ -1764,7 +1826,7 @@ extern BOOL          _pcre_is_newline(USPTR, int, USPTR, int *, BOOL);
 extern int           _pcre_ord2utf8(int, uschar *);
 extern real_pcre    *_pcre_try_flipped(const real_pcre *, real_pcre *,
                       const pcre_study_data *, pcre_study_data *);
-#define              _pcre_valid_utf8(u, i) TRUE
+#define              _pcre_valid_utf8(USPTR, int) TRUE
 extern BOOL          _pcre_was_newline(USPTR, int, USPTR, int *, BOOL);
 extern BOOL          _pcre_xclass(int, const uschar *);
--- a/glib/pcre/pcre_study.c
+++ b/glib/pcre/pcre_study.c
@@ -48,6 +48,7 @@ supporting functions. */
 #include "pcre_internal.h"
 #define SET_BIT(c) start_bits[c/8] |= (1 << (c&7))
 /* Returns from set_start_bits() */
@@ -413,6 +414,18 @@ for (;;)
 #endif
    break;
    /* Skip these, but we need to add in the name length. */
    case OP_MARK:
    case OP_PRUNE_ARG:
    case OP_SKIP_ARG:
    cc += _pcre_OP_lengths[op] + cc[1];
    break;
    case OP_THEN_ARG:
    cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE];
    break;
    /* For the record, these are the opcodes that are matched by "default":
    OP_ACCEPT, OP_CLOSE, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_SET_SOM, OP_SKIP,
    OP_THEN. */
@@ -431,25 +444,121 @@ for (;;)
 *      Set a bit and maybe its alternate case    *
 *************************************************/
-/* Given a character, set its bit in the table, and also the bit for the other
+/* Given a character, set its first byte's bit in the table, and also the
-version of a letter if we are caseless.
+corresponding bit for the other version of a letter if we are caseless. In
 UTF-8 mode, for characters greater than 127, we can only do the caseless thing
 when Unicode property support is available.
 Arguments:
  start_bits    points to the bit map
-  c             is the character
+  p             points to the character
  caseless      the caseless flag
  cd            the block with char table pointers
  utf8          TRUE for UTF-8 mode
-Returns:        nothing
+Returns:        pointer after the character
 */
 static const uschar *
 set_table_bit(uschar *start_bits, const uschar *p, BOOL caseless,
  compile_data *cd, BOOL utf8)
 {
 unsigned int c = *p;
 SET_BIT(c);
 #ifdef SUPPORT_UTF8
 if (utf8 && c > 127)
  {
  GETCHARINC(c, p);
 #ifdef SUPPORT_UCP
  if (caseless)
    {
    uschar buff[8];
    c = UCD_OTHERCASE(c);
    (void)_pcre_ord2utf8(c, buff);
    SET_BIT(buff[0]);
    }
 #endif
  return p;
  }
 #endif
 /* Not UTF-8 mode, or character is less than 127. */
 if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
 return p + 1;
 }
 /*************************************************
 *     Set bits for a positive character type     *
 *************************************************/
 /* This function sets starting bits for a character type. In UTF-8 mode, we can
 only do a direct setting for bytes less than 128, as otherwise there can be
 confusion with bytes in the middle of UTF-8 characters. In a "traditional"
 environment, the tables will only recognize ASCII characters anyway, but in at
 least one Windows environment, some higher bytes bits were set in the tables.
 So we deal with that case by considering the UTF-8 encoding.
 Arguments:
  start_bits     the starting bitmap
  cbit type      the type of character wanted
  table_limit    32 for non-UTF-8; 16 for UTF-8
  cd             the block with char table pointers
 Returns:         nothing
 */
 static void
-set_table_bit(uschar *start_bits, unsigned int c, BOOL caseless,
+set_type_bits(uschar *start_bits, int cbit_type, int table_limit,
  compile_data *cd)
 {
-start_bits[c/8] |= (1 << (c&7));
+register int c;
-if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
+for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
-  start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
+if (table_limit == 32) return;
 for (c = 128; c < 256; c++)
  {
  if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
    {
    uschar buff[8];
    (void)_pcre_ord2utf8(c, buff);
    SET_BIT(buff[0]);
    }
  }
 }
 /*************************************************
 *     Set bits for a negative character type     *
 *************************************************/
 /* This function sets starting bits for a negative character type such as \D.
 In UTF-8 mode, we can only do a direct setting for bytes less than 128, as
 otherwise there can be confusion with bytes in the middle of UTF-8 characters.
 Unlike in the positive case, where we can set appropriate starting bits for
 specific high-valued UTF-8 characters, in this case we have to set the bits for
 all high-valued characters. The lowest is 0xc2, but we overkill by starting at
 0xc0 (192) for simplicity.
 Arguments:
  start_bits     the starting bitmap
  cbit type      the type of character wanted
  table_limit    32 for non-UTF-8; 16 for UTF-8
  cd             the block with char table pointers
 Returns:         nothing
 */
 static void
 set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit,
  compile_data *cd)
 {
 register int c;
 for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
 if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
 }
@@ -484,6 +593,7 @@ set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
 {
 register int c;
 int yield = SSB_DONE;
 int table_limit = utf8? 16:32;
 #if 0
 /* ========================================================================= */
@@ -607,12 +717,7 @@ do
      case OP_QUERY:
      case OP_MINQUERY:
      case OP_POSQUERY:
-      set_table_bit(start_bits, tcode[1], caseless, cd);
+      tcode = set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);
      tcode += 2;
 #ifdef SUPPORT_UTF8
      if (utf8 && tcode[-1] >= 0xc0)
        tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
 #endif
      break;
      /* Single-char upto sets the bit and tries the next */
@@ -620,12 +725,7 @@ do
      case OP_UPTO:
      case OP_MINUPTO:
      case OP_POSUPTO:
-      set_table_bit(start_bits, tcode[3], caseless, cd);
+      tcode = set_table_bit(start_bits, tcode + 3, caseless, cd, utf8);
      tcode += 4;
 #ifdef SUPPORT_UTF8
      if (utf8 && tcode[-1] >= 0xc0)
        tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
 #endif
      break;
      /* At least one single char sets the bit and stops */
@@ -638,59 +738,86 @@ do
      case OP_PLUS:
      case OP_MINPLUS:
      case OP_POSPLUS:
-      set_table_bit(start_bits, tcode[1], caseless, cd);
+      (void)set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);
      try_next = FALSE;
      break;
-      /* Single character type sets the bits and stops */
+      /* Special spacing and line-terminating items. These recognize specific
      lists of characters. The difference between VSPACE and ANYNL is that the
      latter can match the two-character CRLF sequence, but that is not
      relevant for finding the first character, so their code here is
      identical. */
      case OP_HSPACE:
      SET_BIT(0x09);
      SET_BIT(0x20);
      if (utf8)
        {
        SET_BIT(0xC2);  /* For U+00A0 */
        SET_BIT(0xE1);  /* For U+1680, U+180E */
        SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
        SET_BIT(0xE3);  /* For U+3000 */
        }
      else SET_BIT(0xA0);
      try_next = FALSE;
      break;
      case OP_ANYNL:
      case OP_VSPACE:
      SET_BIT(0x0A);
      SET_BIT(0x0B);
      SET_BIT(0x0C);
      SET_BIT(0x0D);
      if (utf8)
        {
        SET_BIT(0xC2);  /* For U+0085 */
        SET_BIT(0xE2);  /* For U+2028, U+2029 */
        }
      else SET_BIT(0x85);
      try_next = FALSE;
      break;
      /* Single character types set the bits and stop. Note that if PCRE_UCP
      is set, we do not see these op codes because \d etc are converted to
      properties. Therefore, these apply in the case when only characters less
      than 256 are recognized to match the types. */
      case OP_NOT_DIGIT:
-      for (c = 0; c < 32; c++)
+      set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
        start_bits[c] |= ~cd->cbits[c+cbit_digit];
      try_next = FALSE;
      break;
      case OP_DIGIT:
-      for (c = 0; c < 32; c++)
+      set_type_bits(start_bits, cbit_digit, table_limit, cd);
        start_bits[c] |= cd->cbits[c+cbit_digit];
      try_next = FALSE;
      break;
      /* The cbit_space table has vertical tab as whitespace; we have to
-      discard it. */
+      ensure it is set as not whitespace. */
      case OP_NOT_WHITESPACE:
-      for (c = 0; c < 32; c++)
+      set_nottype_bits(start_bits, cbit_space, table_limit, cd);
-        {
+      start_bits[1] |= 0x08;
        int d = cd->cbits[c+cbit_space];
        if (c == 1) d &= ~0x08;
        start_bits[c] |= ~d;
        }
      try_next = FALSE;
      break;
      /* The cbit_space table has vertical tab as whitespace; we have to
-      discard it. */
+      not set it from the table. */
      case OP_WHITESPACE:
-      for (c = 0; c < 32; c++)
+      c = start_bits[1];    /* Save in case it was already set */
-        {
+      set_type_bits(start_bits, cbit_space, table_limit, cd);
-        int d = cd->cbits[c+cbit_space];
+      start_bits[1] = (start_bits[1] & ~0x08) | c;
        if (c == 1) d &= ~0x08;
        start_bits[c] |= d;
        }
      try_next = FALSE;
      break;
      case OP_NOT_WORDCHAR:
-      for (c = 0; c < 32; c++)
+      set_nottype_bits(start_bits, cbit_word, table_limit, cd);
        start_bits[c] |= ~cd->cbits[c+cbit_word];
      try_next = FALSE;
      break;
      case OP_WORDCHAR:
-      for (c = 0; c < 32; c++)
+      set_type_bits(start_bits, cbit_word, table_limit, cd);
        start_bits[c] |= cd->cbits[c+cbit_word];
      try_next = FALSE;
      break;
@@ -699,6 +826,7 @@ do
      case OP_TYPEPLUS:
      case OP_TYPEMINPLUS:
      case OP_TYPEPOSPLUS:
      tcode++;
      break;
@@ -722,52 +850,69 @@ do
      case OP_TYPEPOSQUERY:
      switch(tcode[1])
        {
        default:
        case OP_ANY:
        case OP_ALLANY:
        return SSB_FAIL;
        case OP_HSPACE:
        SET_BIT(0x09);
        SET_BIT(0x20);
        if (utf8)
          {
          SET_BIT(0xC2);  /* For U+00A0 */
          SET_BIT(0xE1);  /* For U+1680, U+180E */
          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
          SET_BIT(0xE3);  /* For U+3000 */
          }
        else SET_BIT(0xA0);
        break;
        case OP_ANYNL:
        case OP_VSPACE:
        SET_BIT(0x0A);
        SET_BIT(0x0B);
        SET_BIT(0x0C);
        SET_BIT(0x0D);
        if (utf8)
          {
          SET_BIT(0xC2);  /* For U+0085 */
          SET_BIT(0xE2);  /* For U+2028, U+2029 */
          }
        else SET_BIT(0x85);
        break;
        case OP_NOT_DIGIT:
-        for (c = 0; c < 32; c++)
+        set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
          start_bits[c] |= ~cd->cbits[c+cbit_digit];
        break;
        case OP_DIGIT:
-        for (c = 0; c < 32; c++)
+        set_type_bits(start_bits, cbit_digit, table_limit, cd);
          start_bits[c] |= cd->cbits[c+cbit_digit];
        break;
        /* The cbit_space table has vertical tab as whitespace; we have to
-        discard it. */
+        ensure it gets set as not whitespace. */
        case OP_NOT_WHITESPACE:
-        for (c = 0; c < 32; c++)
+        set_nottype_bits(start_bits, cbit_space, table_limit, cd);
-          {
+        start_bits[1] |= 0x08;
          int d = cd->cbits[c+cbit_space];
          if (c == 1) d &= ~0x08;
          start_bits[c] |= ~d;
          }
        break;
        /* The cbit_space table has vertical tab as whitespace; we have to
-        discard it. */
+        avoid setting it. */
        case OP_WHITESPACE:
-        for (c = 0; c < 32; c++)
+        c = start_bits[1];    /* Save in case it was already set */
-          {
+        set_type_bits(start_bits, cbit_space, table_limit, cd);
-          int d = cd->cbits[c+cbit_space];
+        start_bits[1] = (start_bits[1] & ~0x08) | c;
          if (c == 1) d &= ~0x08;
          start_bits[c] |= d;
          }
        break;
        case OP_NOT_WORDCHAR:
-        for (c = 0; c < 32; c++)
+        set_nottype_bits(start_bits, cbit_word, table_limit, cd);
          start_bits[c] |= ~cd->cbits[c+cbit_word];
        break;
        case OP_WORDCHAR:
-        for (c = 0; c < 32; c++)
+        set_type_bits(start_bits, cbit_word, table_limit, cd);
          start_bits[c] |= cd->cbits[c+cbit_word];
        break;
        }
--- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c
@@ -123,8 +123,10 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
 #define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
 #define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
 #define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
 #define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
 #define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
 #define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
 #define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
 #define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
 #define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"
@@ -184,6 +186,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Lu0 STR_L STR_u "\0"
 #define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
 #define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
 #define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
 #define STRING_M0 STR_M "\0"
 #define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
 #define STRING_Mc0 STR_M STR_c "\0"
@@ -243,6 +246,10 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
 #define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
 #define STRING_Vai0 STR_V STR_a STR_i "\0"
 #define STRING_Xan0 STR_X STR_a STR_n "\0"
 #define STRING_Xps0 STR_X STR_p STR_s "\0"
 #define STRING_Xsp0 STR_X STR_s STR_p "\0"
 #define STRING_Xwd0 STR_X STR_w STR_d "\0"
 #define STRING_Yi0 STR_Y STR_i "\0"
 #define STRING_Z0 STR_Z "\0"
 #define STRING_Zl0 STR_Z STR_l "\0"
@@ -256,8 +263,10 @@ const char _pcre_utt_names[] =
  STRING_Avestan0
  STRING_Balinese0
  STRING_Bamum0
  STRING_Batak0
  STRING_Bengali0
  STRING_Bopomofo0
  STRING_Brahmi0
  STRING_Braille0
  STRING_Buginese0
  STRING_Buhid0
@@ -319,6 +328,7 @@ const char _pcre_utt_names[] =
  STRING_Lydian0
  STRING_M0
  STRING_Malayalam0
  STRING_Mandaic0
  STRING_Mc0
  STRING_Me0
  STRING_Meetei_Mayek0
@@ -376,6 +386,10 @@ const char _pcre_utt_names[] =
  STRING_Tifinagh0
  STRING_Ugaritic0
  STRING_Vai0
  STRING_Xan0
  STRING_Xps0
  STRING_Xsp0
  STRING_Xwd0
  STRING_Yi0
  STRING_Z0
  STRING_Zl0
@@ -389,131 +403,138 @@ const ucp_type_table _pcre_utt[] = {
  {  20, PT_SC, ucp_Avestan },
  {  28, PT_SC, ucp_Balinese },
  {  37, PT_SC, ucp_Bamum },
-  {  43, PT_SC, ucp_Bengali },
+  {  43, PT_SC, ucp_Batak },
-  {  51, PT_SC, ucp_Bopomofo },
+  {  49, PT_SC, ucp_Bengali },
-  {  60, PT_SC, ucp_Braille },
+  {  57, PT_SC, ucp_Bopomofo },
-  {  68, PT_SC, ucp_Buginese },
+  {  66, PT_SC, ucp_Brahmi },
-  {  77, PT_SC, ucp_Buhid },
+  {  73, PT_SC, ucp_Braille },
-  {  83, PT_GC, ucp_C },
+  {  81, PT_SC, ucp_Buginese },
-  {  85, PT_SC, ucp_Canadian_Aboriginal },
+  {  90, PT_SC, ucp_Buhid },
-  { 105, PT_SC, ucp_Carian },
+  {  96, PT_GC, ucp_C },
-  { 112, PT_PC, ucp_Cc },
+  {  98, PT_SC, ucp_Canadian_Aboriginal },
-  { 115, PT_PC, ucp_Cf },
+  { 118, PT_SC, ucp_Carian },
-  { 118, PT_SC, ucp_Cham },
+  { 125, PT_PC, ucp_Cc },
-  { 123, PT_SC, ucp_Cherokee },
+  { 128, PT_PC, ucp_Cf },
-  { 132, PT_PC, ucp_Cn },
+  { 131, PT_SC, ucp_Cham },
-  { 135, PT_PC, ucp_Co },
+  { 136, PT_SC, ucp_Cherokee },
-  { 138, PT_SC, ucp_Common },
+  { 145, PT_PC, ucp_Cn },
-  { 145, PT_SC, ucp_Coptic },
+  { 148, PT_PC, ucp_Co },
-  { 152, PT_PC, ucp_Cs },
+  { 151, PT_SC, ucp_Common },
-  { 155, PT_SC, ucp_Cuneiform },
+  { 158, PT_SC, ucp_Coptic },
-  { 165, PT_SC, ucp_Cypriot },
+  { 165, PT_PC, ucp_Cs },
-  { 173, PT_SC, ucp_Cyrillic },
+  { 168, PT_SC, ucp_Cuneiform },
-  { 182, PT_SC, ucp_Deseret },
+  { 178, PT_SC, ucp_Cypriot },
-  { 190, PT_SC, ucp_Devanagari },
+  { 186, PT_SC, ucp_Cyrillic },
-  { 201, PT_SC, ucp_Egyptian_Hieroglyphs },
+  { 195, PT_SC, ucp_Deseret },
-  { 222, PT_SC, ucp_Ethiopic },
+  { 203, PT_SC, ucp_Devanagari },
-  { 231, PT_SC, ucp_Georgian },
+  { 214, PT_SC, ucp_Egyptian_Hieroglyphs },
-  { 240, PT_SC, ucp_Glagolitic },
+  { 235, PT_SC, ucp_Ethiopic },
-  { 251, PT_SC, ucp_Gothic },
+  { 244, PT_SC, ucp_Georgian },
-  { 258, PT_SC, ucp_Greek },
+  { 253, PT_SC, ucp_Glagolitic },
-  { 264, PT_SC, ucp_Gujarati },
+  { 264, PT_SC, ucp_Gothic },
-  { 273, PT_SC, ucp_Gurmukhi },
+  { 271, PT_SC, ucp_Greek },
-  { 282, PT_SC, ucp_Han },
+  { 277, PT_SC, ucp_Gujarati },
-  { 286, PT_SC, ucp_Hangul },
+  { 286, PT_SC, ucp_Gurmukhi },
-  { 293, PT_SC, ucp_Hanunoo },
+  { 295, PT_SC, ucp_Han },
-  { 301, PT_SC, ucp_Hebrew },
+  { 299, PT_SC, ucp_Hangul },
-  { 308, PT_SC, ucp_Hiragana },
+  { 306, PT_SC, ucp_Hanunoo },
-  { 317, PT_SC, ucp_Imperial_Aramaic },
+  { 314, PT_SC, ucp_Hebrew },
-  { 334, PT_SC, ucp_Inherited },
+  { 321, PT_SC, ucp_Hiragana },
-  { 344, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 330, PT_SC, ucp_Imperial_Aramaic },
-  { 366, PT_SC, ucp_Inscriptional_Parthian },
+  { 347, PT_SC, ucp_Inherited },
-  { 389, PT_SC, ucp_Javanese },
+  { 357, PT_SC, ucp_Inscriptional_Pahlavi },
-  { 398, PT_SC, ucp_Kaithi },
+  { 379, PT_SC, ucp_Inscriptional_Parthian },
-  { 405, PT_SC, ucp_Kannada },
+  { 402, PT_SC, ucp_Javanese },
-  { 413, PT_SC, ucp_Katakana },
+  { 411, PT_SC, ucp_Kaithi },
-  { 422, PT_SC, ucp_Kayah_Li },
+  { 418, PT_SC, ucp_Kannada },
-  { 431, PT_SC, ucp_Kharoshthi },
+  { 426, PT_SC, ucp_Katakana },
-  { 442, PT_SC, ucp_Khmer },
+  { 435, PT_SC, ucp_Kayah_Li },
-  { 448, PT_GC, ucp_L },
+  { 444, PT_SC, ucp_Kharoshthi },
-  { 450, PT_LAMP, 0 },
+  { 455, PT_SC, ucp_Khmer },
-  { 453, PT_SC, ucp_Lao },
+  { 461, PT_GC, ucp_L },
-  { 457, PT_SC, ucp_Latin },
+  { 463, PT_LAMP, 0 },
-  { 463, PT_SC, ucp_Lepcha },
+  { 466, PT_SC, ucp_Lao },
-  { 470, PT_SC, ucp_Limbu },
+  { 470, PT_SC, ucp_Latin },
-  { 476, PT_SC, ucp_Linear_B },
+  { 476, PT_SC, ucp_Lepcha },
-  { 485, PT_SC, ucp_Lisu },
+  { 483, PT_SC, ucp_Limbu },
-  { 490, PT_PC, ucp_Ll },
+  { 489, PT_SC, ucp_Linear_B },
-  { 493, PT_PC, ucp_Lm },
+  { 498, PT_SC, ucp_Lisu },
-  { 496, PT_PC, ucp_Lo },
+  { 503, PT_PC, ucp_Ll },
-  { 499, PT_PC, ucp_Lt },
+  { 506, PT_PC, ucp_Lm },
-  { 502, PT_PC, ucp_Lu },
+  { 509, PT_PC, ucp_Lo },
-  { 505, PT_SC, ucp_Lycian },
+  { 512, PT_PC, ucp_Lt },
-  { 512, PT_SC, ucp_Lydian },
+  { 515, PT_PC, ucp_Lu },
-  { 519, PT_GC, ucp_M },
+  { 518, PT_SC, ucp_Lycian },
-  { 521, PT_SC, ucp_Malayalam },
+  { 525, PT_SC, ucp_Lydian },
-  { 531, PT_PC, ucp_Mc },
+  { 532, PT_GC, ucp_M },
-  { 534, PT_PC, ucp_Me },
+  { 534, PT_SC, ucp_Malayalam },
-  { 537, PT_SC, ucp_Meetei_Mayek },
+  { 544, PT_SC, ucp_Mandaic },
-  { 550, PT_PC, ucp_Mn },
+  { 552, PT_PC, ucp_Mc },
-  { 553, PT_SC, ucp_Mongolian },
+  { 555, PT_PC, ucp_Me },
-  { 563, PT_SC, ucp_Myanmar },
+  { 558, PT_SC, ucp_Meetei_Mayek },
-  { 571, PT_GC, ucp_N },
+  { 571, PT_PC, ucp_Mn },
-  { 573, PT_PC, ucp_Nd },
+  { 574, PT_SC, ucp_Mongolian },
-  { 576, PT_SC, ucp_New_Tai_Lue },
+  { 584, PT_SC, ucp_Myanmar },
-  { 588, PT_SC, ucp_Nko },
+  { 592, PT_GC, ucp_N },
-  { 592, PT_PC, ucp_Nl },
+  { 594, PT_PC, ucp_Nd },
-  { 595, PT_PC, ucp_No },
+  { 597, PT_SC, ucp_New_Tai_Lue },
-  { 598, PT_SC, ucp_Ogham },
+  { 609, PT_SC, ucp_Nko },
-  { 604, PT_SC, ucp_Ol_Chiki },
+  { 613, PT_PC, ucp_Nl },
-  { 613, PT_SC, ucp_Old_Italic },
+  { 616, PT_PC, ucp_No },
-  { 624, PT_SC, ucp_Old_Persian },
+  { 619, PT_SC, ucp_Ogham },
-  { 636, PT_SC, ucp_Old_South_Arabian },
+  { 625, PT_SC, ucp_Ol_Chiki },
-  { 654, PT_SC, ucp_Old_Turkic },
+  { 634, PT_SC, ucp_Old_Italic },
-  { 665, PT_SC, ucp_Oriya },
+  { 645, PT_SC, ucp_Old_Persian },
-  { 671, PT_SC, ucp_Osmanya },
+  { 657, PT_SC, ucp_Old_South_Arabian },
-  { 679, PT_GC, ucp_P },
+  { 675, PT_SC, ucp_Old_Turkic },
-  { 681, PT_PC, ucp_Pc },
+  { 686, PT_SC, ucp_Oriya },
-  { 684, PT_PC, ucp_Pd },
+  { 692, PT_SC, ucp_Osmanya },
-  { 687, PT_PC, ucp_Pe },
+  { 700, PT_GC, ucp_P },
-  { 690, PT_PC, ucp_Pf },
+  { 702, PT_PC, ucp_Pc },
-  { 693, PT_SC, ucp_Phags_Pa },
+  { 705, PT_PC, ucp_Pd },
-  { 702, PT_SC, ucp_Phoenician },
+  { 708, PT_PC, ucp_Pe },
-  { 713, PT_PC, ucp_Pi },
+  { 711, PT_PC, ucp_Pf },
-  { 716, PT_PC, ucp_Po },
+  { 714, PT_SC, ucp_Phags_Pa },
-  { 719, PT_PC, ucp_Ps },
+  { 723, PT_SC, ucp_Phoenician },
-  { 722, PT_SC, ucp_Rejang },
+  { 734, PT_PC, ucp_Pi },
-  { 729, PT_SC, ucp_Runic },
+  { 737, PT_PC, ucp_Po },
-  { 735, PT_GC, ucp_S },
+  { 740, PT_PC, ucp_Ps },
-  { 737, PT_SC, ucp_Samaritan },
+  { 743, PT_SC, ucp_Rejang },
-  { 747, PT_SC, ucp_Saurashtra },
+  { 750, PT_SC, ucp_Runic },
-  { 758, PT_PC, ucp_Sc },
+  { 756, PT_GC, ucp_S },
-  { 761, PT_SC, ucp_Shavian },
+  { 758, PT_SC, ucp_Samaritan },
-  { 769, PT_SC, ucp_Sinhala },
+  { 768, PT_SC, ucp_Saurashtra },
-  { 777, PT_PC, ucp_Sk },
+  { 779, PT_PC, ucp_Sc },
-  { 780, PT_PC, ucp_Sm },
+  { 782, PT_SC, ucp_Shavian },
-  { 783, PT_PC, ucp_So },
+  { 790, PT_SC, ucp_Sinhala },
-  { 786, PT_SC, ucp_Sundanese },
+  { 798, PT_PC, ucp_Sk },
-  { 796, PT_SC, ucp_Syloti_Nagri },
+  { 801, PT_PC, ucp_Sm },
-  { 809, PT_SC, ucp_Syriac },
+  { 804, PT_PC, ucp_So },
-  { 816, PT_SC, ucp_Tagalog },
+  { 807, PT_SC, ucp_Sundanese },
-  { 824, PT_SC, ucp_Tagbanwa },
+  { 817, PT_SC, ucp_Syloti_Nagri },
-  { 833, PT_SC, ucp_Tai_Le },
+  { 830, PT_SC, ucp_Syriac },
-  { 840, PT_SC, ucp_Tai_Tham },
+  { 837, PT_SC, ucp_Tagalog },
-  { 849, PT_SC, ucp_Tai_Viet },
+  { 845, PT_SC, ucp_Tagbanwa },
-  { 858, PT_SC, ucp_Tamil },
+  { 854, PT_SC, ucp_Tai_Le },
-  { 864, PT_SC, ucp_Telugu },
+  { 861, PT_SC, ucp_Tai_Tham },
-  { 871, PT_SC, ucp_Thaana },
+  { 870, PT_SC, ucp_Tai_Viet },
-  { 878, PT_SC, ucp_Thai },
+  { 879, PT_SC, ucp_Tamil },
-  { 883, PT_SC, ucp_Tibetan },
+  { 885, PT_SC, ucp_Telugu },
-  { 891, PT_SC, ucp_Tifinagh },
+  { 892, PT_SC, ucp_Thaana },
-  { 900, PT_SC, ucp_Ugaritic },
+  { 899, PT_SC, ucp_Thai },
-  { 909, PT_SC, ucp_Vai },
+  { 904, PT_SC, ucp_Tibetan },
-  { 913, PT_SC, ucp_Yi },
+  { 912, PT_SC, ucp_Tifinagh },
-  { 916, PT_GC, ucp_Z },
+  { 921, PT_SC, ucp_Ugaritic },
-  { 918, PT_PC, ucp_Zl },
+  { 930, PT_SC, ucp_Vai },
-  { 921, PT_PC, ucp_Zp },
+  { 934, PT_ALNUM, 0 },
-  { 924, PT_PC, ucp_Zs }
+  { 938, PT_PXSPACE, 0 },
  { 942, PT_SPACE, 0 },
  { 946, PT_WORD, 0 },
  { 950, PT_SC, ucp_Yi },
  { 953, PT_GC, ucp_Z },
  { 955, PT_PC, ucp_Zl },
  { 958, PT_PC, ucp_Zp },
  { 961, PT_PC, ucp_Zs }
 };
 const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
--- a/glib/pcre/pcre_xclass.c
+++ b/glib/pcre/pcre_xclass.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
                       Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -104,6 +104,7 @@ while ((t = *data++) != XCL_END)
  else  /* XCL_PROP & XCL_NOTPROP */
    {
    int chartype = UCD_CHARTYPE(c);
    switch(*data)
      {
      case PT_ANY:
@@ -111,12 +112,13 @@ while ((t = *data++) != XCL_END)
      break;
      case PT_LAMP:
-      if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
+      if ((chartype == ucp_Lu || chartype == ucp_Ll ||
-          (t == XCL_PROP)) return !negated;
+           chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
      break;
      case PT_GC:
-      if ((data[1] == _pcre_ucp_gentype[chartype]) == (t == XCL_PROP)) return !negated;
+      if ((data[1] == _pcre_ucp_gentype[chartype]) == (t == XCL_PROP))
        return !negated;
      break;
      case PT_PC:
@@ -127,6 +129,33 @@ while ((t = *data++) != XCL_END)
      if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
      break;
      case PT_ALNUM:
      if ((_pcre_ucp_gentype[chartype] == ucp_L ||
           _pcre_ucp_gentype[chartype] == ucp_N) == (t == XCL_PROP))
        return !negated;
      break;
      case PT_SPACE:    /* Perl space */
      if ((_pcre_ucp_gentype[chartype] == ucp_Z ||
           c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
             == (t == XCL_PROP))
        return !negated;
      break;
      case PT_PXSPACE:  /* POSIX space */
      if ((_pcre_ucp_gentype[chartype] == ucp_Z ||
           c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
           c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
        return !negated;
      break;
      case PT_WORD:
      if ((_pcre_ucp_gentype[chartype] == ucp_L ||
           _pcre_ucp_gentype[chartype] == ucp_N || c == CHAR_UNDERSCORE)
             == (t == XCL_PROP))
        return !negated;
      break;
      /* This should never occur, but compilers may mutter if there is no
      default. */
--- a/glib/pcre/ucp.h
+++ b/glib/pcre/ucp.h
@@ -150,7 +150,10 @@ enum {
  ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
  ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
  ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
-  ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET
+  ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
  ucp_Batak = G_UNICODE_SCRIPT_BATAK,
  ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
  ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC
 };
 #endif