Update PCRE to 7.8

svn path=/trunk/; revision=7813
2025-04-02 05:43:07 +02:00 · 2009-01-18 06:32:03 +00:00 · 2009-01-18 06:32:03 +00:00 · d6f23279e7
commit d6f23279e7
parent 1da8112081
21 changed files with 411 additions and 321 deletions
--- a/4
+++ b/4
@ -1,3 +1,7 @@
+2009-01-18  Matthias Clasen  <mclasen@redhat.com>
+
+	* glib/pcre: Update to PCRE 7.8
+
 2009-01-17  Matthias Clasen  <mclasen@redhat.com>

 	Bug 567977 – textdomain() macro should not return NULL when 
--- a/glib/pcre/pcre.h
+++ b/glib/pcre/pcre.h
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */

 #define PCRE_MAJOR          7
-#define PCRE_MINOR          7
+#define PCRE_MINOR          8
 #define PCRE_PRERELEASE     
-#define PCRE_DATE           2008-05-07
+#define PCRE_DATE           2008-09-05

 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
--- a/glib/pcre/pcre_chartables.c
+++ b/glib/pcre/pcre_chartables.c
@ -1,6 +1,3 @@
-/* This file is autogenerated by ../update-pcre/update.sh during
- * the update of the local copy of PCRE.
- */
 /*************************************************
 *      Perl-Compatible Regular Expressions       *
 *************************************************/
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
@ -331,7 +331,7 @@ static const char *
 find_error_text(int n)
 {
 const char *s = error_texts;
-for (; n > 0; n--) while (*s++ != 0);
+for (; n > 0; n--) while (*s++ != 0) {};
 return s;
 }

@ -437,7 +437,7 @@ else
      {
      const uschar *p;
      for (p = ptr+2; *p != 0 && *p != '}'; p++)
-        if (*p != '-' && g_ascii_isdigit(*p) == 0) break;
+        if (*p != '-' && g_ascii_isdigit (*p) == 0) break;
      if (*p != 0 && *p != '}')
        {
        c = -ESC_k;
@ -456,7 +456,7 @@ else
    else negated = FALSE;

    c = 0;
-    while (g_ascii_isdigit(ptr[1]) != 0)
+    while (g_ascii_isdigit (ptr[1]) != 0)
      c = c * 10 + *(++ptr) - '0';

    if (c < 0)   /* Integer overflow */
@ -509,7 +509,7 @@ else
      {
      oldptr = ptr;
      c -= '0';
-      while (g_ascii_isdigit(ptr[1]) != 0)
+      while (g_ascii_isdigit (ptr[1]))
        c = c * 10 + *(++ptr) - '0';
      if (c < 0)    /* Integer overflow */
        {
@ -559,7 +559,7 @@ else
      int count = 0;

      c = 0;
-      while (g_ascii_isxdigit(*pt) != 0)
+      while (g_ascii_isxdigit (*pt) != 0)
        {
        register int cc = *pt++;
        if (c == 0 && cc == '0') continue;     /* Leading zeroes */
@ -588,7 +588,7 @@ else
    /* Read just a single-byte hex-defined char */

    c = 0;
-    while (i++ < 2 && g_ascii_isxdigit(ptr[1]) != 0)
+    while (i++ < 2 && g_ascii_isxdigit (ptr[1]) != 0)
      {
      int cc;                               /* Some compilers don't like ++ */
      cc = *(++ptr);                        /* in initializers */
@ -757,15 +757,15 @@ Returns:    TRUE or FALSE
 static BOOL
 is_counted_repeat(const uschar *p)
 {
-if (g_ascii_isdigit(*p++) == 0) return FALSE;
-while (g_ascii_isdigit(*p) != 0) p++;
+if (g_ascii_isdigit (*p++) == 0) return FALSE;
+while (g_ascii_isdigit (*p) != 0) p++;
 if (*p == '}') return TRUE;

 if (*p++ != ',') return FALSE;
 if (*p == '}') return TRUE;

-if (g_ascii_isdigit(*p++) == 0) return FALSE;
-while (g_ascii_isdigit(*p) != 0) p++;
+if (g_ascii_isdigit (*p++) == 0) return FALSE;
+while (g_ascii_isdigit (*p) != 0) p++;

 return (*p == '}');
 }
@ -800,7 +800,7 @@ int max = -1;
 /* Read the minimum value and do a paranoid check: a negative value indicates
 an integer overflow. */

-while (g_ascii_isdigit(*p) != 0) min = min * 10 + *p++ - '0';
+while (g_ascii_isdigit (*p) != 0) min = min * 10 + *p++ - '0';
 if (min < 0 || min > 65535)
  {
  *errorcodeptr = ERR5;
@ -815,7 +815,7 @@ if (*p == '}') max = min; else
  if (*(++p) != '}')
    {
    max = 0;
-    while(g_ascii_isdigit(*p) != 0) max = max * 10 + *p++ - '0';
+    while(g_ascii_isdigit (*p) != 0) max = max * 10 + *p++ - '0';
    if (max < 0 || max > 65535)
      {
      *errorcodeptr = ERR5;
@ -878,7 +878,7 @@ for (; *ptr != 0; ptr++)
    if (*(++ptr) == 0) return -1;
    if (*ptr == 'Q') for (;;)
      {
-      while (*(++ptr) != 0 && *ptr != '\\');
+      while (*(++ptr) != 0 && *ptr != '\\') {};
      if (*ptr == 0) return -1;
      if (*(++ptr) == 'E') break;
      }
@ -921,7 +921,7 @@ for (; *ptr != 0; ptr++)
        if (*(++ptr) == 0) return -1;
        if (*ptr == 'Q') for (;;)
          {
-          while (*(++ptr) != 0 && *ptr != '\\');
+          while (*(++ptr) != 0 && *ptr != '\\') {};
          if (*ptr == 0) return -1;
          if (*(++ptr) == 'E') break;
          }
@ -935,7 +935,7 @@ for (; *ptr != 0; ptr++)

  if (xmode && *ptr == '#')
    {
-    while (*(++ptr) != 0 && *ptr != '\n');
+    while (*(++ptr) != 0 && *ptr != '\n') {};
    if (*ptr == 0) return -1;
    continue;
    }
@ -1326,6 +1326,8 @@ for (;;)
      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
      break;
      }
+#else
+    (void)(utf8);  /* Keep compiler happy by referencing function argument */
 #endif
    }
  }
@ -1419,6 +1421,8 @@ for (;;)
      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
      break;
      }
+#else
+    (void)(utf8);  /* Keep compiler happy by referencing function argument */
 #endif
    }
  }
@ -1891,7 +1895,7 @@ get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,
 unsigned int c, othercase, next;

 for (c = *cptr; c <= d; c++)
-  { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }
+  { if ((othercase = UCD_OTHERCASE(c)) != c) break; }

 if (c > d) return FALSE;

@ -1900,7 +1904,7 @@ next = othercase + 1;

 for (++c; c <= d; c++)
  {
-  if (_pcre_ucp_othercase(c) != next) break;
+  if (UCD_OTHERCASE(c) != next) break;
  next++;
  }

@ -2010,6 +2014,8 @@ if (next >= 0) switch(op_code)
  case OP_CHAR:
 #ifdef SUPPORT_UTF8
  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
+#else
+  (void)(utf8_char);  /* Keep compiler happy by referencing function argument */
 #endif
  return item != next;

@ -2028,7 +2034,7 @@ if (next >= 0) switch(op_code)
    unsigned int othercase;
    if (next < 128) othercase = cd->fcc[next]; else
 #ifdef SUPPORT_UCP
-    othercase = _pcre_ucp_othercase((unsigned int)next);
+    othercase = UCD_OTHERCASE((unsigned int)next);
 #else
    othercase = NOTACHAR;
 #endif
@ -2049,7 +2055,7 @@ if (next >= 0) switch(op_code)
    unsigned int othercase;
    if (next < 128) othercase = cd->fcc[next]; else
 #ifdef SUPPORT_UCP
-    othercase = _pcre_ucp_othercase(next);
+    othercase = UCD_OTHERCASE(next);
 #else
    othercase = NOTACHAR;
 #endif
@ -3215,7 +3221,7 @@ for (;; ptr++)
        if ((options & PCRE_CASELESS) != 0)
          {
          unsigned int othercase;
-          if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)
+          if ((othercase = UCD_OTHERCASE(c)) != c)
            {
            *class_utf8data++ = XCL_SINGLE;
            class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
@ -4092,7 +4098,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
      const char *vn = verbnames;
      const uschar *name = ++ptr;
      previous = NULL;
-      while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
+      while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
      if (*ptr == ':')
        {
        *errorcodeptr = ERR59;   /* Not supported */
@ -4230,7 +4236,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
        while ((cd->ctypes[*ptr] & ctype_word) != 0)
          {
          if (recno >= 0)
-            recno = (g_ascii_isdigit(*ptr) != 0)?
+            recno = (g_ascii_isdigit (*ptr) != 0)?
              recno * 10 + *ptr - '0' : -1;
          ptr++;
          }
@ -4315,7 +4321,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
          recno = 0;
          for (i = 1; i < namelen; i++)
            {
-            if (g_ascii_isdigit(name[i]) == 0)
+            if (g_ascii_isdigit (name[i]) == 0)
              {
              *errorcodeptr = ERR15;
              goto FAILED;
@ -4411,7 +4417,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
        *code++ = OP_CALLOUT;
          {
          int n = 0;
-          while (g_ascii_isdigit(*(++ptr)) != 0)
+          while (g_ascii_isdigit (*(++ptr)) != 0)
            n = n * 10 + *ptr - '0';
          if (*ptr != ')')
            {
@ -4626,7 +4632,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
          if ((refsign = *ptr) == '+')
            {
            ptr++;
-            if (g_ascii_isdigit(*ptr) == 0)
+            if (g_ascii_isdigit (*ptr) == 0)
              {
              *errorcodeptr = ERR63;
              goto FAILED;
@ -4634,13 +4640,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
            }
          else if (refsign == '-')
            {
-            if (g_ascii_isdigit(ptr[1]) == 0)
+            if (g_ascii_isdigit (ptr[1]) == 0)
              goto OTHER_CHAR_AFTER_QUERY;
            ptr++;
            }

          recno = 0;
-          while(g_ascii_isdigit(*ptr) != 0)
+          while(g_ascii_isdigit (*ptr) != 0)
            recno = recno * 10 + *ptr++ - '0';

          if (*ptr != terminator)
@ -4796,10 +4802,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
        both phases.

        If we are not at the pattern start, compile code to change the ims
-        options if this setting actually changes any of them. We also pass the
-        new setting back so that it can be put at the start of any following
-        branches, and when this group ends (if we are in a group), a resetting
-        item can be compiled. */
+        options if this setting actually changes any of them, and reset the
+        greedy defaults and the case value for firstbyte and reqbyte. */

        if (*ptr == ')')
          {
@ -4807,7 +4811,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
               (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
            {
            cd->external_options = newoptions;
-            options = *optionsptr = newoptions;
            }
         else
            {
@ -4816,17 +4819,17 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
              *code++ = OP_OPT;
              *code++ = newoptions & PCRE_IMS;
              }
-
-            /* Change options at this level, and pass them back for use
-            in subsequent branches. Reset the greedy defaults and the case
-            value for firstbyte and reqbyte. */
-
-            *optionsptr = options = newoptions;
            greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
            greedy_non_default = greedy_default ^ 1;
-            req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
+            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
            }

+          /* Change options at this level, and pass them back for use
+          in subsequent branches. When not at the start of the pattern, this
+          information is also necessary so that a resetting item can be
+          compiled at the end of a group (if we are in a group). */
+
+          *optionsptr = options = newoptions;
          previous = NULL;       /* This item can't be repeated */
          continue;              /* It is complete */
          }
@ -5115,7 +5118,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
        /* Test a signed number in angle brackets or quotes. */

        p = ptr + 2;
-        while (g_ascii_isdigit(*p) != 0) p++;
+        while (g_ascii_isdigit (*p) != 0) p++;
        if (*p != terminator)
          {
          *errorcodeptr = ERR57;
@ -5820,7 +5823,7 @@ Returns:        pointer to compiled data block, or NULL on error,
                with errorptr and erroroffset set
 */

-PCRE_EXP_DEFN pcre *
+PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
 pcre_compile(const char *pattern, int options, const char **errorptr,
  int *erroroffset, const unsigned char *tables)
 {
@ -5828,7 +5831,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
 }


-PCRE_EXP_DEFN pcre *
+PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
 pcre_compile2(const char *pattern, int options, int *errorcodeptr,
  const char **errorptr, int *erroroffset, const unsigned char *tables)
 {
--- a/glib/pcre/pcre_config.c
+++ b/glib/pcre/pcre_config.c
@ -62,7 +62,7 @@ Arguments:
 Returns:           0 if data returned, negative on error
 */

-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_config(int what, void *where)
 {
 switch (what)
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@ -512,9 +512,6 @@ for (;;)
    const uschar *code;
    int state_offset = current_state->offset;
    int count, codevalue;
-#ifdef SUPPORT_UCP
-    int chartype, script;
-#endif

 #ifdef DEBUG
    printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@ -825,7 +822,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
        switch(code[1])
          {
          case PT_ANY:
@ -837,7 +834,7 @@ for (;;)
          break;

          case PT_GC:
-          OK = category == code[2];
+          OK = _pcre_ucp_gentype[chartype] == code[2];
          break;

          case PT_PC:
@ -845,7 +842,7 @@ for (;;)
          break;

          case PT_SC:
-          OK = script == code[2];
+          OK = UCD_SCRIPT(c) == code[2];
          break;

          /* Should never occur, but keep compilers from grumbling. */
@ -994,7 +991,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
        switch(code[2])
          {
          case PT_ANY:
@ -1006,7 +1003,7 @@ for (;;)
          break;

          case PT_GC:
-          OK = category == code[3];
+          OK = _pcre_ucp_gentype[chartype] == code[3];
          break;

          case PT_PC:
@ -1014,7 +1011,7 @@ for (;;)
          break;

          case PT_SC:
-          OK = script == code[3];
+          OK = UCD_SCRIPT(c) == code[3];
          break;

          /* Should never occur, but keep compilers from grumbling. */
@ -1043,7 +1040,7 @@ for (;;)
      case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
      count = current_state->count;  /* Already matched */
      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
        {
        const uschar *nptr = ptr + clen;
        int ncount = 0;
@ -1057,7 +1054,7 @@ for (;;)
          int nd;
          int ndlen = 1;
          GETCHARLEN(nd, nptr, ndlen);
-          if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(nd) != ucp_M) break;
          ncount++;
          nptr += ndlen;
          }
@ -1216,7 +1213,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
        switch(code[2])
          {
          case PT_ANY:
@ -1228,7 +1225,7 @@ for (;;)
          break;

          case PT_GC:
-          OK = category == code[3];
+          OK = _pcre_ucp_gentype[chartype] == code[3];
          break;

          case PT_PC:
@ -1236,7 +1233,7 @@ for (;;)
          break;

          case PT_SC:
-          OK = script == code[3];
+          OK = UCD_SCRIPT(c) == code[3];
          break;

          /* Should never occur, but keep compilers from grumbling. */
@ -1274,7 +1271,7 @@ for (;;)
      QS2:

      ADD_ACTIVE(state_offset + 2, 0);
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
        {
        const uschar *nptr = ptr + clen;
        int ncount = 0;
@ -1289,7 +1286,7 @@ for (;;)
          int nd;
          int ndlen = 1;
          GETCHARLEN(nd, nptr, ndlen);
-          if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(nd) != ucp_M) break;
          ncount++;
          nptr += ndlen;
          }
@ -1463,7 +1460,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
        switch(code[4])
          {
          case PT_ANY:
@ -1475,7 +1472,7 @@ for (;;)
          break;

          case PT_GC:
-          OK = category == code[5];
+          OK = _pcre_ucp_gentype[chartype] == code[5];
          break;

          case PT_PC:
@ -1483,7 +1480,7 @@ for (;;)
          break;

          case PT_SC:
-          OK = script == code[5];
+          OK = UCD_SCRIPT(c) == code[5];
          break;

          /* Should never occur, but keep compilers from grumbling. */
@ -1516,7 +1513,7 @@ for (;;)
      if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
        { ADD_ACTIVE(state_offset + 4, 0); }
      count = current_state->count;  /* Number already matched */
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
        {
        const uschar *nptr = ptr + clen;
        int ncount = 0;
@ -1530,7 +1527,7 @@ for (;;)
          int nd;
          int ndlen = 1;
          GETCHARLEN(nd, nptr, ndlen);
-          if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(nd) != ucp_M) break;
          ncount++;
          nptr += ndlen;
          }
@ -1710,7 +1707,7 @@ for (;;)
          other case of the character. */

 #ifdef SUPPORT_UCP
-          othercase = _pcre_ucp_othercase(c);
+          othercase = UCD_OTHERCASE(c);
 #else
          othercase = NOTACHAR;
 #endif
@ -1735,7 +1732,7 @@ for (;;)
      to wait for them to pass before continuing. */

      case OP_EXTUNI:
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
        {
        const uschar *nptr = ptr + clen;
        int ncount = 0;
@ -1743,7 +1740,7 @@ for (;;)
          {
          int nclen = 1;
          GETCHARLEN(c, nptr, nclen);
-          if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(c) != ucp_M) break;
          ncount++;
          nptr += nclen;
          }
@ -1911,7 +1908,7 @@ for (;;)
          if (utf8 && d >= 128)
            {
 #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
 #endif  /* SUPPORT_UCP */
            }
          else
@ -1949,7 +1946,7 @@ for (;;)
          if (utf8 && d >= 128)
            {
 #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
 #endif  /* SUPPORT_UCP */
            }
          else
@ -1985,7 +1982,7 @@ for (;;)
          if (utf8 && d >= 128)
            {
 #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
 #endif  /* SUPPORT_UCP */
            }
          else
@ -2017,7 +2014,7 @@ for (;;)
          if (utf8 && d >= 128)
            {
 #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
 #endif  /* SUPPORT_UCP */
            }
          else
@ -2052,7 +2049,7 @@ for (;;)
          if (utf8 && d >= 128)
            {
 #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
 #endif  /* SUPPORT_UCP */
            }
          else
@ -2508,7 +2505,7 @@ Returns:          > 0 => number of match offset pairs placed in offsets
                 < -1 => some kind of unexpected problem
 */

-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
  const char *subject, int length, int start_offset, int options, int *offsets,
  int offsetcount, int *workspace, int wscount)
@ -2736,7 +2733,18 @@ for (;;)

    if (firstline)
      {
-      const uschar *t = current_subject;
+      USPTR t = current_subject;
+#ifdef SUPPORT_UTF8
+      if (utf8)
+        {
+        while (t < md->end_subject && !IS_NEWLINE(t))
+          {
+          t++;
+          while (t < end_subject && (*t & 0xc0) == 0x80) t++;
+          }
+        }
+      else
+#endif
      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
      end_subject = t;
      }
@ -2758,7 +2766,20 @@ for (;;)
      {
      if (current_subject > md->start_subject + start_offset)
        {
-        while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
+#ifdef SUPPORT_UTF8
+        if (utf8)
+          {
+          while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
+            {
+            current_subject++;
+            while(current_subject < end_subject &&
+                  (*current_subject & 0xc0) == 0x80)
+              current_subject++;
+            }
+          }
+        else
+#endif
+        while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
          current_subject++;

        /* If we have just passed a CR and the newline option is ANY or
--- a/glib/pcre/pcre_exec.c
+++ b/glib/pcre/pcre_exec.c
@ -158,13 +158,39 @@ printf("\n");

 if (length > md->end_subject - eptr) return FALSE;

-/* Separate the caselesss case for speed */
+/* Separate the caseless case for speed. In UTF-8 mode we can only do this
+properly if Unicode properties are supported. Otherwise, we can check only
+ASCII characters. */

 if ((ims & PCRE_CASELESS) != 0)
  {
-  while (length-- > 0)
-    if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
+#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UCP
+  if (md->utf8)
+    {
+    USPTR endptr = eptr + length;
+    while (eptr < endptr)
+      {
+      int c, d;
+      GETCHARINC(c, eptr);
+      GETCHARINC(d, p);
+      if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
      }
+    }
+  else
+#endif
+#endif
+
+  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
+  is no UCP support. */
+
+  while (length-- > 0)
+    { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
+  }
+
+/* In the caseful case, we can just compare the bytes, whether or not we
+are in UTF-8 mode. */
+
 else
  { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }

@ -1653,9 +1679,7 @@ for (;;)
    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    GETCHARINCTEST(c, eptr);
      {
-      int chartype, script;
-      int category = _pcre_ucp_findprop(c, &chartype, &script);
-
+      int chartype = UCD_CHARTYPE(c);
      switch(ecode[1])
        {
        case PT_ANY:
@ -1670,7 +1694,7 @@ for (;;)
         break;

        case PT_GC:
-        if ((ecode[2] != category) == (op == OP_PROP))
+        if ((ecode[2] != _pcre_ucp_gentype[chartype]) == (op == OP_PROP))
          RRETURN(MATCH_NOMATCH);
        break;

@ -1680,7 +1704,7 @@ for (;;)
        break;

        case PT_SC:
-        if ((ecode[2] != script) == (op == OP_PROP))
+        if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
          RRETURN(MATCH_NOMATCH);
        break;

@ -1699,8 +1723,7 @@ for (;;)
    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    GETCHARINCTEST(c, eptr);
      {
-      int chartype, script;
-      int category = _pcre_ucp_findprop(c, &chartype, &script);
+      int category = UCD_CATEGORY(c);
      if (category == ucp_M) RRETURN(MATCH_NOMATCH);
      while (eptr < md->end_subject)
        {
@ -1709,7 +1732,7 @@ for (;;)
          {
          GETCHARLEN(c, eptr, len);
          }
-        category = _pcre_ucp_findprop(c, &chartype, &script);
+        category = UCD_CATEGORY(c);
        if (category != ucp_M) break;
        eptr += len;
        }
@ -2174,7 +2197,7 @@ for (;;)
        if (fc != dc)
          {
 #ifdef SUPPORT_UCP
-          if (dc != _pcre_ucp_othercase(fc))
+          if (dc != UCD_OTHERCASE(fc))
 #endif
            RRETURN(MATCH_NOMATCH);
          }
@ -2265,7 +2288,7 @@ for (;;)
 #ifdef SUPPORT_UCP
        unsigned int othercase;
        if ((ims & PCRE_CASELESS) != 0 &&
-            (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
+            (othercase = UCD_OTHERCASE(fc)) != fc)
          oclength = _pcre_ord2utf8(othercase, occhars);
        else oclength = 0;
 #endif  /* SUPPORT_UCP */
@ -2585,10 +2608,11 @@ for (;;)
            {
            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINC(d, eptr);
            if (d < 256) d = md->lcc[d];
-            if (fi >= max || eptr >= md->end_subject || fc == d)
-              RRETURN(MATCH_NOMATCH);
+            if (fc == d) RRETURN(MATCH_NOMATCH);
+
            }
          }
        else
@ -2694,9 +2718,9 @@ for (;;)
            {
            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINC(d, eptr);
-            if (fi >= max || eptr >= md->end_subject || fc == d)
-              RRETURN(MATCH_NOMATCH);
+            if (fc == d) RRETURN(MATCH_NOMATCH);
            }
          }
        else
@ -2870,7 +2894,7 @@ for (;;)
            {
            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINCTEST(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
            if ((prop_chartype == ucp_Lu ||
                 prop_chartype == ucp_Ll ||
                 prop_chartype == ucp_Lt) == prop_fail_result)
@ -2883,7 +2907,7 @@ for (;;)
            {
            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINCTEST(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
            if ((prop_category == prop_value) == prop_fail_result)
              RRETURN(MATCH_NOMATCH);
            }
@ -2894,7 +2918,7 @@ for (;;)
            {
            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINCTEST(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
            if ((prop_chartype == prop_value) == prop_fail_result)
              RRETURN(MATCH_NOMATCH);
            }
@ -2905,7 +2929,7 @@ for (;;)
            {
            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINCTEST(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_script = UCD_SCRIPT(c);
            if ((prop_script == prop_value) == prop_fail_result)
              RRETURN(MATCH_NOMATCH);
            }
@ -2924,7 +2948,7 @@ for (;;)
        for (i = 1; i <= min; i++)
          {
          GETCHARINCTEST(c, eptr);
-          prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+          prop_category = UCD_CATEGORY(c);
          if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
          while (eptr < md->end_subject)
            {
@ -2933,7 +2957,7 @@ for (;;)
              {
              GETCHARLEN(c, eptr, len);
              }
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
            if (prop_category != ucp_M) break;
            eptr += len;
            }
@ -3349,7 +3373,7 @@ for (;;)
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINC(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
            if ((prop_chartype == ucp_Lu ||
                 prop_chartype == ucp_Ll ||
                 prop_chartype == ucp_Lt) == prop_fail_result)
@ -3364,7 +3388,7 @@ for (;;)
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINC(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
            if ((prop_category == prop_value) == prop_fail_result)
              RRETURN(MATCH_NOMATCH);
            }
@ -3377,7 +3401,7 @@ for (;;)
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINC(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
            if ((prop_chartype == prop_value) == prop_fail_result)
              RRETURN(MATCH_NOMATCH);
            }
@ -3390,7 +3414,7 @@ for (;;)
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINC(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_script = UCD_SCRIPT(c);
            if ((prop_script == prop_value) == prop_fail_result)
              RRETURN(MATCH_NOMATCH);
            }
@ -3412,7 +3436,7 @@ for (;;)
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
          if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
          GETCHARINCTEST(c, eptr);
-          prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+          prop_category = UCD_CATEGORY(c);
          if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
          while (eptr < md->end_subject)
            {
@ -3421,7 +3445,7 @@ for (;;)
              {
              GETCHARLEN(c, eptr, len);
              }
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
            if (prop_category != ucp_M) break;
            eptr += len;
            }
@ -3739,7 +3763,7 @@ for (;;)
            int len = 1;
            if (eptr >= md->end_subject) break;
            GETCHARLEN(c, eptr, len);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
            if ((prop_chartype == ucp_Lu ||
                 prop_chartype == ucp_Ll ||
                 prop_chartype == ucp_Lt) == prop_fail_result)
@ -3754,7 +3778,7 @@ for (;;)
            int len = 1;
            if (eptr >= md->end_subject) break;
            GETCHARLEN(c, eptr, len);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
            if ((prop_category == prop_value) == prop_fail_result)
              break;
            eptr+= len;
@ -3767,7 +3791,7 @@ for (;;)
            int len = 1;
            if (eptr >= md->end_subject) break;
            GETCHARLEN(c, eptr, len);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
            if ((prop_chartype == prop_value) == prop_fail_result)
              break;
            eptr+= len;
@ -3780,7 +3804,7 @@ for (;;)
            int len = 1;
            if (eptr >= md->end_subject) break;
            GETCHARLEN(c, eptr, len);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_script = UCD_SCRIPT(c);
            if ((prop_script == prop_value) == prop_fail_result)
              break;
            eptr+= len;
@ -3809,7 +3833,7 @@ for (;;)
          {
          if (eptr >= md->end_subject) break;
          GETCHARINCTEST(c, eptr);
-          prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+          prop_category = UCD_CATEGORY(c);
          if (prop_category == ucp_M) break;
          while (eptr < md->end_subject)
            {
@ -3818,7 +3842,7 @@ for (;;)
              {
              GETCHARLEN(c, eptr, len);
              }
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
            if (prop_category != ucp_M) break;
            eptr += len;
            }
@ -3840,7 +3864,7 @@ for (;;)
              BACKCHAR(eptr);
              GETCHARLEN(c, eptr, len);
              }
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
            if (prop_category != ucp_M) break;
            eptr--;
            }
@ -4360,7 +4384,7 @@ Returns:          > 0 => success; value is the number of elements filled in
                 < -1 => some kind of unexpected problem
 */

-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
  PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
  int offsetcount)
@ -4672,31 +4696,53 @@ for(;;)
  if (firstline)
    {
    USPTR t = start_match;
+#ifdef SUPPORT_UTF8
+    if (utf8)
+      {
+      while (t < md->end_subject && !IS_NEWLINE(t))
+        {
+        t++;
+        while (t < end_subject && (*t & 0xc0) == 0x80) t++;
+        }
+      }
+    else
+#endif
    while (t < md->end_subject && !IS_NEWLINE(t)) t++;
    end_subject = t;
    }

-  /* Now test for a unique first byte */
+  /* Now advance to a unique first byte if there is one. */

  if (first_byte >= 0)
    {
    if (first_byte_caseless)
-      while (start_match < end_subject &&
-             md->lcc[*start_match] != first_byte)
-        { NEXTCHAR(start_match); }
+      while (start_match < end_subject && md->lcc[*start_match] != first_byte)
+        start_match++;
    else
      while (start_match < end_subject && *start_match != first_byte)
-        { NEXTCHAR(start_match); }
+        start_match++;
    }

-  /* Or to just after a linebreak for a multiline match if possible */
+  /* Or to just after a linebreak for a multiline match */

  else if (startline)
    {
    if (start_match > md->start_subject + start_offset)
      {
-      while (start_match <= end_subject && !WAS_NEWLINE(start_match))
-        { NEXTCHAR(start_match); }
+#ifdef SUPPORT_UTF8
+      if (utf8)
+        {
+        while (start_match < end_subject && !WAS_NEWLINE(start_match))
+          {
+          start_match++;
+          while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
+            start_match++;
+          }
+        }
+      else
+#endif
+      while (start_match < end_subject && !WAS_NEWLINE(start_match))
+        start_match++;

      /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
      and we are now at a LF, advance the match position by one more character.
@ -4710,15 +4756,14 @@ for(;;)
      }
    }

-  /* Or to a non-unique first char after study */
+  /* Or to a non-unique first byte after study */

  else if (start_bits != NULL)
    {
    while (start_match < end_subject)
      {
      register unsigned int c = *start_match;
-      if ((start_bits[c/8] & (1 << (c&7))) == 0)
-        { NEXTCHAR(start_match); }
+      if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
        else break;
      }
    }
--- a/glib/pcre/pcre_fullinfo.c
+++ b/glib/pcre/pcre_fullinfo.c
@ -65,7 +65,7 @@ Arguments:
 Returns:           0 if data returned, negative on error
 */

-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
  void *where)
 {
--- a/glib/pcre/pcre_get.c
+++ b/glib/pcre/pcre_get.c
@ -65,7 +65,7 @@ Returns:      the number of the named parentheses, or a negative number
                (PCRE_ERROR_NOSUBSTRING) if not found
 */

-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_stringnumber(const pcre *code, const char *stringname)
 {
 int rc;
@ -114,7 +114,7 @@ Returns:      the length of each entry, or a negative number
                (PCRE_ERROR_NOSUBSTRING) if not found
 */

-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
  char **firstptr, char **lastptr)
 {
@ -231,7 +231,7 @@ Returns:         if successful:
                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */

-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
  int stringnumber, char *buffer, int size)
 {
@ -276,7 +276,7 @@ Returns:         if successful:
                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */

-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
  int stringcount, const char *stringname, char *buffer, int size)
 {
@ -308,7 +308,7 @@ Returns:         if successful: 0
                   PCRE_ERROR_NOMEMORY (-6) failed to get store
 */

-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
  const char ***listptr)
 {
@ -353,7 +353,7 @@ Argument:   the result of a previous pcre_get_substring_list()
 Returns:    nothing
 */

-void
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre_free_substring_list(const char **pointer)
 {
 (pcre_free)((void *)pointer);
@ -386,7 +386,7 @@ Returns:         if successful:
                   PCRE_ERROR_NOSUBSTRING (-7) substring not present
 */

-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_substring(const char *subject, int *ovector, int stringcount,
  int stringnumber, const char **stringptr)
 {
@ -433,7 +433,7 @@ Returns:         if successful:
                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */

-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
  int stringcount, const char *stringname, const char **stringptr)
 {
@ -456,7 +456,7 @@ Argument:   the result of a previous pcre_get_substring()
 Returns:    nothing
 */

-void
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre_free_substring(const char *pointer)
 {
 (pcre_free)((void *)pointer);
--- a/glib/pcre/pcre_globals.c
+++ b/glib/pcre/pcre_globals.c
@ -52,8 +52,6 @@ differently, and global variables are not used (see pcre.in). */

 #include "pcre_internal.h"

-#ifndef VPCOMPAT
 PCRE_EXP_DATA_DEFN int   (*pcre_callout)(pcre_callout_block *) = NULL;
-#endif

 /* End of pcre_globals.c */
--- a/glib/pcre/pcre_info.c
+++ b/glib/pcre/pcre_info.c
@ -72,7 +72,7 @@ Returns:        number of capturing subpatterns
                or negative values on error
 */

-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
 {
 real_pcre internal_re;
--- a/glib/pcre/pcre_internal.h
+++ b/glib/pcre/pcre_internal.h
@ -132,6 +132,20 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
 #  endif
 #endif

+/* When compiling with the MSVC compiler, it is sometimes necessary to include
+a "calling convention" before exported function names. (This is secondhand
+information; I know nothing about MSVC myself). For example, something like
+
+  void __cdecl function(....)
+
+might be needed. In order so make this easy, all the exported functions have
+PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not
+set, we ensure here that it has no effect. */
+
+#ifndef PCRE_CALL_CONVENTION
+#define PCRE_CALL_CONVENTION
+#endif
+
 /* We need to have types that specify unsigned 16-bit and 32-bit integers. We
 cannot determine these outside the compilation (e.g. by running a program as
 part of "configure") because PCRE is often cross-compiled for use on other
@ -140,16 +154,20 @@ preprocessor time in standard C environments. */

 #if USHRT_MAX == 65535
  typedef unsigned short pcre_uint16;
+  typedef short pcre_int16;
 #elif UINT_MAX == 65535
  typedef unsigned int pcre_uint16;
+  typedef int pcre_int16;
 #else
  #error Cannot determine a type for 16-bit unsigned integers
 #endif

 #if UINT_MAX == 4294967295
  typedef unsigned int pcre_uint32;
+  typedef int pcre_int32;
 #elif ULONG_MAX == 4294967295
  typedef unsigned long int pcre_uint32;
+  typedef long int pcre_int32;
 #else
  #error Cannot determine a type for 32-bit unsigned integers
 #endif
@ -241,7 +259,6 @@ option on the command line. */
 #define strncmp(s1,s2,m) _strncmp(s1,s2,m)
 #define memcmp(s,c,n)    _memcmp(s,c,n)
 #define memcpy(d,s,n)    _memcpy(d,s,n)
-#define memmove(d,s,n)   _memmove(d,s,n)
 #define memset(s,c,n)    _memset(s,c,n)
 #else  /* VPCOMPAT */

@ -363,7 +380,6 @@ never be called in byte mode. To make sure it can never even appear when UTF-8
 support is omitted, we don't even define it. */

 #ifndef SUPPORT_UTF8
-#define NEXTCHAR(p) p++;
 #define GETCHAR(c, eptr) c = *eptr;
 #define GETCHARTEST(c, eptr) c = *eptr;
 #define GETCHARINC(c, eptr) c = *eptr++;
@ -373,13 +389,6 @@ support is omitted, we don't even define it. */

 #else   /* SUPPORT_UTF8 */

-/* Advance a character pointer one byte in non-UTF-8 mode and by one character
-in UTF-8 mode. */
-
-#define NEXTCHAR(p) \
-  p++; \
-  if (utf8) { while((*p & 0xc0) == 0x80) p++; }
-
 /* Get the next UTF-8 character, not advancing the pointer. This is called when
 we know we are in UTF-8 mode. */

@ -549,7 +558,8 @@ variable-length repeat, or a anything other than literal characters. */
 #define REQ_CASELESS 0x0100    /* indicates caselessness */
 #define REQ_VARY     0x0200    /* reqbyte followed non-literal item */

-/* Miscellaneous definitions */
+/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
+environments where these macros are defined elsewhere. */

 typedef gboolean BOOL;

@ -1123,12 +1133,24 @@ extern BOOL         _pcre_is_newline(const uschar *, int, const uschar *,
 extern int          _pcre_ord2utf8(int, uschar *);
 extern real_pcre   *_pcre_try_flipped(const real_pcre *, real_pcre *,
                      const pcre_study_data *, pcre_study_data *);
-extern int          _pcre_ucp_findprop(const unsigned int, int *, int *);
-extern unsigned int _pcre_ucp_othercase(const unsigned int);
 extern int          _pcre_valid_utf8(const uschar *, int);
 extern BOOL         _pcre_was_newline(const uschar *, int, const uschar *,
                      int *, BOOL);
 extern BOOL         _pcre_xclass(int, const uschar *);
+extern unsigned int _pcre_ucp_othercase(unsigned int);
+
+
+extern const int         _pcre_ucp_gentype[];
+
+
+/* UCD access macros */
+
+#include "../glib.h"
+
+#define UCD_CHARTYPE(ch)  g_unichar_type(ch)
+#define UCD_SCRIPT(ch)    g_unichar_get_script(ch)
+#define UCD_CATEGORY(ch)  _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
+#define UCD_OTHERCASE(ch) _pcre_ucp_othercase(ch)

 #endif

--- a/glib/pcre/pcre_ord2utf8.c
+++ b/glib/pcre/pcre_ord2utf8.c
@ -78,8 +78,10 @@ for (j = i; j > 0; j--)
 *buffer = _pcre_utf8_table2[i] | cvalue;
 return i + 1;
 #else
-return 0;   /* Keep compiler happy; this function won't ever be */
-#endif      /* called when SUPPORT_UTF8 is not defined. */
+(void)(cvalue);  /* Keep compiler happy; this function won't ever be */
+(void)(buffer);  /* called when SUPPORT_UTF8 is not defined. */
+return 0;
+#endif
 }

 /* End of pcre_ord2utf8.c */
--- a/glib/pcre/pcre_refcount.c
+++ b/glib/pcre/pcre_refcount.c
@ -68,7 +68,7 @@ Returns:        the (possibly updated) count value (a non-negative number), or
                a negative error number
 */

-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_refcount(pcre *argument_re, int adjust)
 {
 real_pcre *re = (real_pcre *)argument_re;
--- a/glib/pcre/pcre_study.c
+++ b/glib/pcre/pcre_study.c
@ -220,6 +220,7 @@ do
      /* SKIPZERO skips the bracket. */

      case OP_SKIPZERO:
+      tcode++;
      do tcode += GET(tcode,1); while (*tcode == OP_ALT);
      tcode += 1 + LINK_SIZE;
      break;
@ -503,7 +504,7 @@ Returns:    pointer to a pcre_extra block, with study_data filled in and the
            NULL on error or if no optimization possible
 */

-PCRE_EXP_DEFN pcre_extra *
+PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
 pcre_study(const pcre *external_re, int options, const char **errorptr)
 {
 uschar start_bits[32];
--- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c
@ -87,6 +87,19 @@ const uschar _pcre_utf8_table4[] = {
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };

+/* Table to translate from particular type value to the general value. */
+
+const int _pcre_ucp_gentype[] = {
+  ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
+  ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
+  ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
+  ucp_N, ucp_N, ucp_N,                /* Nd, Nl, No */
+  ucp_P, ucp_P, ucp_P, ucp_P, ucp_P,  /* Pc, Pd, Pe, Pf, Pi */
+  ucp_P, ucp_P,                       /* Ps, Po */
+  ucp_S, ucp_S, ucp_S, ucp_S,         /* Sc, Sk, Sm, So */
+  ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
+};
+
 /* The pcre_utt[] table below translates Unicode property names into type and
 code values. It is searched by binary chop, so must be in collating sequence of
 name. Originally, the table contained pointers to the name strings in the first
@ -94,7 +107,10 @@ field of each entry. However, that leads to a large number of relocations when
 a shared library is dynamically loaded. A significant reduction is made by
 putting all the names into a single, large string and then using offsets in the
 table itself. Maintenance is more error-prone, but frequent changes to this
-data is unlikely. */
+data are unlikely.
+
+July 2008: There is now a script called maint/GenerateUtt.py which can be used
+to generate this data instead of maintaining it entirely by hand. */

 const char _pcre_utt_names[] =
  "Any\0"
@ -108,8 +124,10 @@ const char _pcre_utt_names[] =
  "Buhid\0"
  "C\0"
  "Canadian_Aboriginal\0"
+  "Carian\0"
  "Cc\0"
  "Cf\0"
+  "Cham\0"
  "Cherokee\0"
  "Cn\0"
  "Co\0"
@ -136,12 +154,14 @@ const char _pcre_utt_names[] =
  "Inherited\0"
  "Kannada\0"
  "Katakana\0"
+  "Kayah_Li\0"
  "Kharoshthi\0"
  "Khmer\0"
  "L\0"
  "L&\0"
  "Lao\0"
  "Latin\0"
+  "Lepcha\0"
  "Limbu\0"
  "Linear_B\0"
  "Ll\0"
@ -149,6 +169,8 @@ const char _pcre_utt_names[] =
  "Lo\0"
  "Lt\0"
  "Lu\0"
+  "Lycian\0"
+  "Lydian\0"
  "M\0"
  "Malayalam\0"
  "Mc\0"
@ -163,6 +185,7 @@ const char _pcre_utt_names[] =
  "Nl\0"
  "No\0"
  "Ogham\0"
+  "Ol_Chiki\0"
  "Old_Italic\0"
  "Old_Persian\0"
  "Oriya\0"
@ -177,14 +200,17 @@ const char _pcre_utt_names[] =
  "Pi\0"
  "Po\0"
  "Ps\0"
+  "Rejang\0"
  "Runic\0"
  "S\0"
+  "Saurashtra\0"
  "Sc\0"
  "Shavian\0"
  "Sinhala\0"
  "Sk\0"
  "Sm\0"
  "So\0"
+  "Sundanese\0"
  "Syloti_Nagri\0"
  "Syriac\0"
  "Tagalog\0"
@ -197,6 +223,7 @@ const char _pcre_utt_names[] =
  "Tibetan\0"
  "Tifinagh\0"
  "Ugaritic\0"
+  "Vai\0"
  "Yi\0"
  "Z\0"
  "Zl\0"
@ -215,100 +242,111 @@ const ucp_type_table _pcre_utt[] = {
  {  63, PT_SC, ucp_Buhid },
  {  69, PT_GC, ucp_C },
  {  71, PT_SC, ucp_Canadian_Aboriginal },
-  { 91,  PT_PC, ucp_Cc },
-  { 94,  PT_PC, ucp_Cf },
-  { 97,  PT_SC, ucp_Cherokee },
-  { 106, PT_PC, ucp_Cn },
-  { 109, PT_PC, ucp_Co },
-  { 112, PT_SC, ucp_Common },
-  { 119, PT_SC, ucp_Coptic },
-  { 126, PT_PC, ucp_Cs },
-  { 129, PT_SC, ucp_Cuneiform },
-  { 139, PT_SC, ucp_Cypriot },
-  { 147, PT_SC, ucp_Cyrillic },
-  { 156, PT_SC, ucp_Deseret },
-  { 164, PT_SC, ucp_Devanagari },
-  { 175, PT_SC, ucp_Ethiopic },
-  { 184, PT_SC, ucp_Georgian },
-  { 193, PT_SC, ucp_Glagolitic },
-  { 204, PT_SC, ucp_Gothic },
-  { 211, PT_SC, ucp_Greek },
-  { 217, PT_SC, ucp_Gujarati },
-  { 226, PT_SC, ucp_Gurmukhi },
-  { 235, PT_SC, ucp_Han },
-  { 239, PT_SC, ucp_Hangul },
-  { 246, PT_SC, ucp_Hanunoo },
-  { 254, PT_SC, ucp_Hebrew },
-  { 261, PT_SC, ucp_Hiragana },
-  { 270, PT_SC, ucp_Inherited },
-  { 280, PT_SC, ucp_Kannada },
-  { 288, PT_SC, ucp_Katakana },
-  { 297, PT_SC, ucp_Kharoshthi },
-  { 308, PT_SC, ucp_Khmer },
-  { 314, PT_GC, ucp_L },
-  { 316, PT_LAMP, 0 },
-  { 319, PT_SC, ucp_Lao },
-  { 323, PT_SC, ucp_Latin },
-  { 329, PT_SC, ucp_Limbu },
-  { 335, PT_SC, ucp_Linear_B },
-  { 344, PT_PC, ucp_Ll },
-  { 347, PT_PC, ucp_Lm },
-  { 350, PT_PC, ucp_Lo },
-  { 353, PT_PC, ucp_Lt },
-  { 356, PT_PC, ucp_Lu },
-  { 359, PT_GC, ucp_M },
-  { 361, PT_SC, ucp_Malayalam },
-  { 371, PT_PC, ucp_Mc },
-  { 374, PT_PC, ucp_Me },
-  { 377, PT_PC, ucp_Mn },
-  { 380, PT_SC, ucp_Mongolian },
-  { 390, PT_SC, ucp_Myanmar },
-  { 398, PT_GC, ucp_N },
-  { 400, PT_PC, ucp_Nd },
-  { 403, PT_SC, ucp_New_Tai_Lue },
-  { 415, PT_SC, ucp_Nko },
-  { 419, PT_PC, ucp_Nl },
-  { 422, PT_PC, ucp_No },
-  { 425, PT_SC, ucp_Ogham },
-  { 431, PT_SC, ucp_Old_Italic },
-  { 442, PT_SC, ucp_Old_Persian },
-  { 454, PT_SC, ucp_Oriya },
-  { 460, PT_SC, ucp_Osmanya },
-  { 468, PT_GC, ucp_P },
-  { 470, PT_PC, ucp_Pc },
-  { 473, PT_PC, ucp_Pd },
-  { 476, PT_PC, ucp_Pe },
-  { 479, PT_PC, ucp_Pf },
-  { 482, PT_SC, ucp_Phags_Pa },
-  { 491, PT_SC, ucp_Phoenician },
-  { 502, PT_PC, ucp_Pi },
-  { 505, PT_PC, ucp_Po },
-  { 508, PT_PC, ucp_Ps },
-  { 511, PT_SC, ucp_Runic },
-  { 517, PT_GC, ucp_S },
-  { 519, PT_PC, ucp_Sc },
-  { 522, PT_SC, ucp_Shavian },
-  { 530, PT_SC, ucp_Sinhala },
-  { 538, PT_PC, ucp_Sk },
-  { 541, PT_PC, ucp_Sm },
-  { 544, PT_PC, ucp_So },
-  { 547, PT_SC, ucp_Syloti_Nagri },
-  { 560, PT_SC, ucp_Syriac },
-  { 567, PT_SC, ucp_Tagalog },
-  { 575, PT_SC, ucp_Tagbanwa },
-  { 584, PT_SC, ucp_Tai_Le },
-  { 591, PT_SC, ucp_Tamil },
-  { 597, PT_SC, ucp_Telugu },
-  { 604, PT_SC, ucp_Thaana },
-  { 611, PT_SC, ucp_Thai },
-  { 616, PT_SC, ucp_Tibetan },
-  { 624, PT_SC, ucp_Tifinagh },
-  { 633, PT_SC, ucp_Ugaritic },
-  { 642, PT_SC, ucp_Yi },
-  { 645, PT_GC, ucp_Z },
-  { 647, PT_PC, ucp_Zl },
-  { 650, PT_PC, ucp_Zp },
-  { 653, PT_PC, ucp_Zs }
+  {  91, PT_SC, ucp_Carian },
+  {  98, PT_PC, ucp_Cc },
+  { 101, PT_PC, ucp_Cf },
+  { 104, PT_SC, ucp_Cham },
+  { 109, PT_SC, ucp_Cherokee },
+  { 118, PT_PC, ucp_Cn },
+  { 121, PT_PC, ucp_Co },
+  { 124, PT_SC, ucp_Common },
+  { 131, PT_SC, ucp_Coptic },
+  { 138, PT_PC, ucp_Cs },
+  { 141, PT_SC, ucp_Cuneiform },
+  { 151, PT_SC, ucp_Cypriot },
+  { 159, PT_SC, ucp_Cyrillic },
+  { 168, PT_SC, ucp_Deseret },
+  { 176, PT_SC, ucp_Devanagari },
+  { 187, PT_SC, ucp_Ethiopic },
+  { 196, PT_SC, ucp_Georgian },
+  { 205, PT_SC, ucp_Glagolitic },
+  { 216, PT_SC, ucp_Gothic },
+  { 223, PT_SC, ucp_Greek },
+  { 229, PT_SC, ucp_Gujarati },
+  { 238, PT_SC, ucp_Gurmukhi },
+  { 247, PT_SC, ucp_Han },
+  { 251, PT_SC, ucp_Hangul },
+  { 258, PT_SC, ucp_Hanunoo },
+  { 266, PT_SC, ucp_Hebrew },
+  { 273, PT_SC, ucp_Hiragana },
+  { 282, PT_SC, ucp_Inherited },
+  { 292, PT_SC, ucp_Kannada },
+  { 300, PT_SC, ucp_Katakana },
+  { 309, PT_SC, ucp_Kayah_Li },
+  { 318, PT_SC, ucp_Kharoshthi },
+  { 329, PT_SC, ucp_Khmer },
+  { 335, PT_GC, ucp_L },
+  { 337, PT_LAMP, 0 },
+  { 340, PT_SC, ucp_Lao },
+  { 344, PT_SC, ucp_Latin },
+  { 350, PT_SC, ucp_Lepcha },
+  { 357, PT_SC, ucp_Limbu },
+  { 363, PT_SC, ucp_Linear_B },
+  { 372, PT_PC, ucp_Ll },
+  { 375, PT_PC, ucp_Lm },
+  { 378, PT_PC, ucp_Lo },
+  { 381, PT_PC, ucp_Lt },
+  { 384, PT_PC, ucp_Lu },
+  { 387, PT_SC, ucp_Lycian },
+  { 394, PT_SC, ucp_Lydian },
+  { 401, PT_GC, ucp_M },
+  { 403, PT_SC, ucp_Malayalam },
+  { 413, PT_PC, ucp_Mc },
+  { 416, PT_PC, ucp_Me },
+  { 419, PT_PC, ucp_Mn },
+  { 422, PT_SC, ucp_Mongolian },
+  { 432, PT_SC, ucp_Myanmar },
+  { 440, PT_GC, ucp_N },
+  { 442, PT_PC, ucp_Nd },
+  { 445, PT_SC, ucp_New_Tai_Lue },
+  { 457, PT_SC, ucp_Nko },
+  { 461, PT_PC, ucp_Nl },
+  { 464, PT_PC, ucp_No },
+  { 467, PT_SC, ucp_Ogham },
+  { 473, PT_SC, ucp_Ol_Chiki },
+  { 482, PT_SC, ucp_Old_Italic },
+  { 493, PT_SC, ucp_Old_Persian },
+  { 505, PT_SC, ucp_Oriya },
+  { 511, PT_SC, ucp_Osmanya },
+  { 519, PT_GC, ucp_P },
+  { 521, PT_PC, ucp_Pc },
+  { 524, PT_PC, ucp_Pd },
+  { 527, PT_PC, ucp_Pe },
+  { 530, PT_PC, ucp_Pf },
+  { 533, PT_SC, ucp_Phags_Pa },
+  { 542, PT_SC, ucp_Phoenician },
+  { 553, PT_PC, ucp_Pi },
+  { 556, PT_PC, ucp_Po },
+  { 559, PT_PC, ucp_Ps },
+  { 562, PT_SC, ucp_Rejang },
+  { 569, PT_SC, ucp_Runic },
+  { 575, PT_GC, ucp_S },
+  { 577, PT_SC, ucp_Saurashtra },
+  { 588, PT_PC, ucp_Sc },
+  { 591, PT_SC, ucp_Shavian },
+  { 599, PT_SC, ucp_Sinhala },
+  { 607, PT_PC, ucp_Sk },
+  { 610, PT_PC, ucp_Sm },
+  { 613, PT_PC, ucp_So },
+  { 616, PT_SC, ucp_Sundanese },
+  { 626, PT_SC, ucp_Syloti_Nagri },
+  { 639, PT_SC, ucp_Syriac },
+  { 646, PT_SC, ucp_Tagalog },
+  { 654, PT_SC, ucp_Tagbanwa },
+  { 663, PT_SC, ucp_Tai_Le },
+  { 670, PT_SC, ucp_Tamil },
+  { 676, PT_SC, ucp_Telugu },
+  { 683, PT_SC, ucp_Thaana },
+  { 690, PT_SC, ucp_Thai },
+  { 695, PT_SC, ucp_Tibetan },
+  { 703, PT_SC, ucp_Tifinagh },
+  { 712, PT_SC, ucp_Ugaritic },
+  { 721, PT_SC, ucp_Vai },
+  { 725, PT_SC, ucp_Yi },
+  { 728, PT_GC, ucp_Z },
+  { 730, PT_PC, ucp_Zl },
+  { 733, PT_PC, ucp_Zp },
+  { 736, PT_PC, ucp_Zs }
 };

 const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
--- a/glib/pcre/pcre_ucp_searchfuncs.c
+++ b/glib/pcre/pcre_ucp_searchfuncs.c
@ -43,58 +43,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* This module contains code for searching the table of Unicode character
 properties. */

-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
 #include "pcre_internal.h"

 #include "ucp.h"               /* Category definitions */
-#include "ucpinternal.h"       /* Internal table details */
-
-
-/* Table to translate from particular type value to the general value. */
-
-static int ucp_gentype[] = {
-  ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
-  ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
-  ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
-  ucp_N, ucp_N, ucp_N,                /* Nd, Nl, No */
-  ucp_P, ucp_P, ucp_P, ucp_P, ucp_P,  /* Pc, Pd, Pe, Pf, Pi */
-  ucp_P, ucp_P,                       /* Ps, Po */
-  ucp_S, ucp_S, ucp_S, ucp_S,         /* Sc, Sk, Sm, So */
-  ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
-};
-
-
-
-/*************************************************
-*         Search table and return type           *
-*************************************************/
-
-/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
-character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
-
-Arguments:
-  c           the character value
-  type_ptr    the detailed character type is returned here
-  script_ptr  the script is returned here
-
-Returns:      the character type category
-*/
-
-int
-_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
-{
-/* Note that the Unicode types have the same values in glib and in
- * PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
- * ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
-*type_ptr = g_unichar_type(c);
-*script_ptr = g_unichar_get_script(c);
-return ucp_gentype[*type_ptr];
-}
-
-


 /*************************************************
@ -113,7 +64,7 @@ Returns:      the other case or NOTACHAR if none
 unsigned int
 _pcre_ucp_othercase(const unsigned int c)
 {
-int other_case = NOTACHAR;
+unsigned int other_case = NOTACHAR;

 if (g_unichar_islower(c))
  other_case = g_unichar_toupper(c);
--- a/glib/pcre/pcre_valid_utf8.c
+++ b/glib/pcre/pcre_valid_utf8.c
@ -1,4 +1,3 @@
-#include "config.h"
 #include "pcre_internal.h"

 /*
--- a/glib/pcre/pcre_version.c
+++ b/glib/pcre/pcre_version.c
@ -79,7 +79,7 @@ I could find no way of detecting that a macro is defined as an empty string at
 pre-processor time. This hack uses a standard trick for avoiding calling
 the STRING macro with an empty argument when doing the test. */

-PCRE_EXP_DEFN const char *
+PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
 pcre_version(void)
 {
 return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
--- a/glib/pcre/pcre_xclass.c
+++ b/glib/pcre/pcre_xclass.c
@ -104,9 +104,7 @@ while ((t = *data++) != XCL_END)
 #ifdef SUPPORT_UCP
  else  /* XCL_PROP & XCL_NOTPROP */
    {
-    int chartype, script;
-    int category = _pcre_ucp_findprop(c, &chartype, &script);
-
+    int chartype = UCD_CHARTYPE(c);
    switch(*data)
      {
      case PT_ANY:
@ -119,7 +117,7 @@ while ((t = *data++) != XCL_END)
      break;

      case PT_GC:
-      if ((data[1] == category) == (t == XCL_PROP)) return !negated;
+      if ((data[1] == _pcre_ucp_gentype[chartype]) == (t == XCL_PROP)) return !negated;
      break;

      case PT_PC:
@ -127,7 +125,7 @@ while ((t = *data++) != XCL_END)
      break;

      case PT_SC:
-      if ((data[1] == script) == (t == XCL_PROP)) return !negated;
+      if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
      break;

      /* This should never occur, but compilers may mutter if there is no
--- a/glib/pcre/ucp.h
+++ b/glib/pcre/ucp.h
@ -125,7 +125,18 @@ enum {
  ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,   /* New for Unicode 5.0.0 */
  ucp_Nko = G_UNICODE_SCRIPT_NKO,           	/* New for Unicode 5.0.0 */
  ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,     /* New for Unicode 5.0.0 */
-  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN  /* New for Unicode 5.0.0 */
+  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN, /* New for Unicode 5.0.0 */
+  ucp_Carian = G_UNICODE_SCRIPT_CARIAN,         /* New for Unicode 5.1 */
+  ucp_Cham = G_UNICODE_SCRIPT_CHAM,             /* New for Unicode 5.1 */
+  ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,     /* New for Unicode 5.1 */
+  ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,         /* New for Unicode 5.1 */
+  ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,         /* New for Unicode 5.1 */
+  ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,         /* New for Unicode 5.1 */
+  ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,     /* New for Unicode 5.1 */
+  ucp_Rejang = G_UNICODE_SCRIPT_REJANG,         /* New for Unicode 5.1 */
+  ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA, /* New for Unicode 5.1 */
+  ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,   /* New for Unicode 5.1 */
+  ucp_Vai = G_UNICODE_SCRIPT_VAI                /* New for Unicode 5.1 */
 };

 #endif