Update PCRE to 7.8

svn path=/trunk/; revision=7813
2025-08-23 17:38:54 +02:00 · 2009-01-18 06:32:03 +00:00
parent 1da8112081
commit d6f23279e7
21 changed files with 411 additions and 321 deletions
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@@ -512,9 +512,6 @@ for (;;)
    const uschar *code;
    int state_offset = current_state->offset;
    int count, codevalue;
-#ifdef SUPPORT_UCP
-    int chartype, script;
-#endif

 #ifdef DEBUG
    printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -825,7 +822,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
        switch(code[1])
          {
          case PT_ANY:
@@ -837,7 +834,7 @@ for (;;)
          break;

          case PT_GC:
-          OK = category == code[2];
+          OK = _pcre_ucp_gentype[chartype] == code[2];
          break;

          case PT_PC:
@@ -845,7 +842,7 @@ for (;;)
          break;

          case PT_SC:
-          OK = script == code[2];
+          OK = UCD_SCRIPT(c) == code[2];
          break;

          /* Should never occur, but keep compilers from grumbling. */
@@ -994,7 +991,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
        switch(code[2])
          {
          case PT_ANY:
@@ -1006,7 +1003,7 @@ for (;;)
          break;

          case PT_GC:
-          OK = category == code[3];
+          OK = _pcre_ucp_gentype[chartype] == code[3];
          break;

          case PT_PC:
@@ -1014,7 +1011,7 @@ for (;;)
          break;

          case PT_SC:
-          OK = script == code[3];
+          OK = UCD_SCRIPT(c) == code[3];
          break;

          /* Should never occur, but keep compilers from grumbling. */
@@ -1043,7 +1040,7 @@ for (;;)
      case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
      count = current_state->count;  /* Already matched */
      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
        {
        const uschar *nptr = ptr + clen;
        int ncount = 0;
@@ -1057,7 +1054,7 @@ for (;;)
          int nd;
          int ndlen = 1;
          GETCHARLEN(nd, nptr, ndlen);
-          if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(nd) != ucp_M) break;
          ncount++;
          nptr += ndlen;
          }
@@ -1216,7 +1213,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
        switch(code[2])
          {
          case PT_ANY:
@@ -1228,7 +1225,7 @@ for (;;)
          break;

          case PT_GC:
-          OK = category == code[3];
+          OK = _pcre_ucp_gentype[chartype] == code[3];
          break;

          case PT_PC:
@@ -1236,7 +1233,7 @@ for (;;)
          break;

          case PT_SC:
-          OK = script == code[3];
+          OK = UCD_SCRIPT(c) == code[3];
          break;

          /* Should never occur, but keep compilers from grumbling. */
@@ -1274,7 +1271,7 @@ for (;;)
      QS2:

      ADD_ACTIVE(state_offset + 2, 0);
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
        {
        const uschar *nptr = ptr + clen;
        int ncount = 0;
@@ -1289,7 +1286,7 @@ for (;;)
          int nd;
          int ndlen = 1;
          GETCHARLEN(nd, nptr, ndlen);
-          if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(nd) != ucp_M) break;
          ncount++;
          nptr += ndlen;
          }
@@ -1463,7 +1460,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
        switch(code[4])
          {
          case PT_ANY:
@@ -1475,7 +1472,7 @@ for (;;)
          break;

          case PT_GC:
-          OK = category == code[5];
+          OK = _pcre_ucp_gentype[chartype] == code[5];
          break;

          case PT_PC:
@@ -1483,7 +1480,7 @@ for (;;)
          break;

          case PT_SC:
-          OK = script == code[5];
+          OK = UCD_SCRIPT(c) == code[5];
          break;

          /* Should never occur, but keep compilers from grumbling. */
@@ -1516,7 +1513,7 @@ for (;;)
      if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
        { ADD_ACTIVE(state_offset + 4, 0); }
      count = current_state->count;  /* Number already matched */
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
        {
        const uschar *nptr = ptr + clen;
        int ncount = 0;
@@ -1530,7 +1527,7 @@ for (;;)
          int nd;
          int ndlen = 1;
          GETCHARLEN(nd, nptr, ndlen);
-          if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(nd) != ucp_M) break;
          ncount++;
          nptr += ndlen;
          }
@@ -1710,7 +1707,7 @@ for (;;)
          other case of the character. */

 #ifdef SUPPORT_UCP
-          othercase = _pcre_ucp_othercase(c);
+          othercase = UCD_OTHERCASE(c);
 #else
          othercase = NOTACHAR;
 #endif
@@ -1735,7 +1732,7 @@ for (;;)
      to wait for them to pass before continuing. */

      case OP_EXTUNI:
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
        {
        const uschar *nptr = ptr + clen;
        int ncount = 0;
@@ -1743,7 +1740,7 @@ for (;;)
          {
          int nclen = 1;
          GETCHARLEN(c, nptr, nclen);
-          if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(c) != ucp_M) break;
          ncount++;
          nptr += nclen;
          }
@@ -1911,7 +1908,7 @@ for (;;)
          if (utf8 && d >= 128)
            {
 #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
 #endif  /* SUPPORT_UCP */
            }
          else
@@ -1949,7 +1946,7 @@ for (;;)
          if (utf8 && d >= 128)
            {
 #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
 #endif  /* SUPPORT_UCP */
            }
          else
@@ -1985,7 +1982,7 @@ for (;;)
          if (utf8 && d >= 128)
            {
 #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
 #endif  /* SUPPORT_UCP */
            }
          else
@@ -2017,7 +2014,7 @@ for (;;)
          if (utf8 && d >= 128)
            {
 #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
 #endif  /* SUPPORT_UCP */
            }
          else
@@ -2052,7 +2049,7 @@ for (;;)
          if (utf8 && d >= 128)
            {
 #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
 #endif  /* SUPPORT_UCP */
            }
          else
@@ -2508,7 +2505,7 @@ Returns:          > 0 => number of match offset pairs placed in offsets
                 < -1 => some kind of unexpected problem
 */

-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
  const char *subject, int length, int start_offset, int options, int *offsets,
  int offsetcount, int *workspace, int wscount)
@@ -2736,7 +2733,18 @@ for (;;)

    if (firstline)
      {
-      const uschar *t = current_subject;
+      USPTR t = current_subject;
+#ifdef SUPPORT_UTF8
+      if (utf8)
+        {
+        while (t < md->end_subject && !IS_NEWLINE(t))
+          {
+          t++;
+          while (t < end_subject && (*t & 0xc0) == 0x80) t++;
+          }
+        }
+      else
+#endif
      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
      end_subject = t;
      }
@@ -2758,7 +2766,20 @@ for (;;)
      {
      if (current_subject > md->start_subject + start_offset)
        {
-        while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
+#ifdef SUPPORT_UTF8
+        if (utf8)
+          {
+          while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
+            {
+            current_subject++;
+            while(current_subject < end_subject &&
+                  (*current_subject & 0xc0) == 0x80)
+              current_subject++;
+            }
+          }
+        else
+#endif
+        while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
          current_subject++;

        /* If we have just passed a CR and the newline option is ANY or