Update to PCRE 7.2

svn path=/trunk/; revision=5659
2025-08-22 17:08:53 +02:00 · 2007-07-31 17:22:56 +00:00
parent 4067475919
commit d966e93faf
23 changed files with 1987 additions and 557 deletions
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2006 University of Cambridge
+           Copyright (c) 1997-2007 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -63,24 +63,30 @@ applications. */

 /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
 into others, under special conditions. A gap of 20 between the blocks should be
-enough. */
+enough. The resulting opcodes don't have to be less than 256 because they are
+never stored, so we push them well clear of the normal opcodes. */

-#define OP_PROP_EXTRA 100
-#define OP_EXTUNI_EXTRA 120
-#define OP_ANYNL_EXTRA 140
+#define OP_PROP_EXTRA       300
+#define OP_EXTUNI_EXTRA     320
+#define OP_ANYNL_EXTRA      340
+#define OP_HSPACE_EXTRA     360
+#define OP_VSPACE_EXTRA     380


 /* This table identifies those opcodes that are followed immediately by a
 character that is to be tested in some way. This makes is possible to
 centralize the loading of these characters. In the case of Type * etc, the
 "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
-small value. */
+small value. ***NOTE*** If the start of this table is modified, the two tables
+that follow must also be modified. */

 static uschar coptable[] = {
  0,                             /* End                                    */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */
+  0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
+  0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
  0, 0,                          /* Any, Anybyte                           */
-  0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */
+  0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
+  0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
  0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
  1,                             /* Char                                   */
  1,                             /* Charnc                                 */
@@ -127,7 +133,7 @@ static uschar coptable[] = {
 and \w */

 static uschar toptable1[] = {
-  0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0,
  ctype_digit, ctype_digit,
  ctype_space, ctype_space,
  ctype_word,  ctype_word,
@@ -135,7 +141,7 @@ static uschar toptable1[] = {
 };

 static uschar toptable2[] = {
-  0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0,
  ctype_digit, 0,
  ctype_space, 0,
  ctype_word,  0,
@@ -500,7 +506,9 @@ for (;;)
    const uschar *code;
    int state_offset = current_state->offset;
    int count, codevalue;
+#ifdef SUPPORT_UCP
    int chartype, script;
+#endif

 #ifdef DEBUG
    printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -555,10 +563,10 @@ for (;;)
    permitted.

    We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
-    argument that is not a data character - but is always one byte long.
-    Unfortunately, we have to take special action to deal with  \P, \p, and
-    \X in this case. To keep the other cases fast, convert these ones to new
-    opcodes. */
+    argument that is not a data character - but is always one byte long. We
+    have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
+    this case. To keep the other cases fast, convert these ones to new opcodes.
+    */

    if (coptable[codevalue] > 0)
      {
@@ -576,6 +584,10 @@ for (;;)
          case OP_PROP: codevalue += OP_PROP_EXTRA; break;
          case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
          case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
+          case OP_NOT_HSPACE:
+          case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
+          case OP_NOT_VSPACE:
+          case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
          default: break;
          }
        }
@@ -783,13 +795,12 @@ for (;;)
      break;


-#ifdef SUPPORT_UCP
-
      /*-----------------------------------------------------------------*/
      /* Check the next character by Unicode property. We will get here only
      if the support is in the binary; otherwise a compile-time error occurs.
      */

+#ifdef SUPPORT_UCP
      case OP_PROP:
      case OP_NOTPROP:
      if (clen > 0)
@@ -970,6 +981,7 @@ for (;;)
      argument. It keeps the code above fast for the other cases. The argument
      is in the d variable. */

+#ifdef SUPPORT_UCP
      case OP_PROP_EXTRA + OP_TYPEPLUS:
      case OP_PROP_EXTRA + OP_TYPEMINPLUS:
      case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
@@ -1049,6 +1061,7 @@ for (;;)
        ADD_NEW_DATA(-state_offset, count, ncount);
        }
      break;
+#endif

      /*-----------------------------------------------------------------*/
      case OP_ANYNL_EXTRA + OP_TYPEPLUS:
@@ -1085,6 +1098,97 @@ for (;;)
      break;

      /*-----------------------------------------------------------------*/
+      case OP_VSPACE_EXTRA + OP_TYPEPLUS:
+      case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
+      case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
+      count = current_state->count;  /* Already matched */
+      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
+          case 0x000a:
+          case 0x000b:
+          case 0x000c:
+          case 0x000d:
+          case 0x0085:
+          case 0x2028:
+          case 0x2029:
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          break;
+          }
+
+        if (OK == (d == OP_VSPACE))
+          {
+          if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          count++;
+          ADD_NEW_DATA(-state_offset, count, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_HSPACE_EXTRA + OP_TYPEPLUS:
+      case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
+      case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
+      count = current_state->count;  /* Already matched */
+      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
+          case 0x09:      /* HT */
+          case 0x20:      /* SPACE */
+          case 0xa0:      /* NBSP */
+          case 0x1680:    /* OGHAM SPACE MARK */
+          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
+          case 0x2000:    /* EN QUAD */
+          case 0x2001:    /* EM QUAD */
+          case 0x2002:    /* EN SPACE */
+          case 0x2003:    /* EM SPACE */
+          case 0x2004:    /* THREE-PER-EM SPACE */
+          case 0x2005:    /* FOUR-PER-EM SPACE */
+          case 0x2006:    /* SIX-PER-EM SPACE */
+          case 0x2007:    /* FIGURE SPACE */
+          case 0x2008:    /* PUNCTUATION SPACE */
+          case 0x2009:    /* THIN SPACE */
+          case 0x200A:    /* HAIR SPACE */
+          case 0x202f:    /* NARROW NO-BREAK SPACE */
+          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
+          case 0x3000:    /* IDEOGRAPHIC SPACE */
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          break;
+          }
+
+        if (OK == (d == OP_HSPACE))
+          {
+          if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          count++;
+          ADD_NEW_DATA(-state_offset, count, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+#ifdef SUPPORT_UCP
      case OP_PROP_EXTRA + OP_TYPEQUERY:
      case OP_PROP_EXTRA + OP_TYPEMINQUERY:
      case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
@@ -1182,6 +1286,7 @@ for (;;)
        ADD_NEW_DATA(-(state_offset + count), 0, ncount);
        }
      break;
+#endif

      /*-----------------------------------------------------------------*/
      case OP_ANYNL_EXTRA + OP_TYPEQUERY:
@@ -1226,6 +1331,112 @@ for (;;)
      break;

      /*-----------------------------------------------------------------*/
+      case OP_VSPACE_EXTRA + OP_TYPEQUERY:
+      case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
+      case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
+      count = 2;
+      goto QS4;
+
+      case OP_VSPACE_EXTRA + OP_TYPESTAR:
+      case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
+      case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
+      count = 0;
+
+      QS4:
+      ADD_ACTIVE(state_offset + 2, 0);
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
+          case 0x000a:
+          case 0x000b:
+          case 0x000c:
+          case 0x000d:
+          case 0x0085:
+          case 0x2028:
+          case 0x2029:
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          break;
+          }
+        if (OK == (d == OP_VSPACE))
+          {
+          if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
+              codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          ADD_NEW_DATA(-(state_offset + count), 0, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_HSPACE_EXTRA + OP_TYPEQUERY:
+      case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
+      case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
+      count = 2;
+      goto QS5;
+
+      case OP_HSPACE_EXTRA + OP_TYPESTAR:
+      case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
+      case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
+      count = 0;
+
+      QS5:
+      ADD_ACTIVE(state_offset + 2, 0);
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
+          case 0x09:      /* HT */
+          case 0x20:      /* SPACE */
+          case 0xa0:      /* NBSP */
+          case 0x1680:    /* OGHAM SPACE MARK */
+          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
+          case 0x2000:    /* EN QUAD */
+          case 0x2001:    /* EM QUAD */
+          case 0x2002:    /* EN SPACE */
+          case 0x2003:    /* EM SPACE */
+          case 0x2004:    /* THREE-PER-EM SPACE */
+          case 0x2005:    /* FOUR-PER-EM SPACE */
+          case 0x2006:    /* SIX-PER-EM SPACE */
+          case 0x2007:    /* FIGURE SPACE */
+          case 0x2008:    /* PUNCTUATION SPACE */
+          case 0x2009:    /* THIN SPACE */
+          case 0x200A:    /* HAIR SPACE */
+          case 0x202f:    /* NARROW NO-BREAK SPACE */
+          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
+          case 0x3000:    /* IDEOGRAPHIC SPACE */
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          break;
+          }
+
+        if (OK == (d == OP_HSPACE))
+          {
+          if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
+              codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          ADD_NEW_DATA(-(state_offset + count), 0, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+#ifdef SUPPORT_UCP
      case OP_PROP_EXTRA + OP_TYPEEXACT:
      case OP_PROP_EXTRA + OP_TYPEUPTO:
      case OP_PROP_EXTRA + OP_TYPEMINUPTO:
@@ -1313,6 +1524,7 @@ for (;;)
          { ADD_NEW_DATA(-state_offset, count, ncount); }
        }
      break;
+#endif

      /*-----------------------------------------------------------------*/
      case OP_ANYNL_EXTRA + OP_TYPEEXACT:
@@ -1352,6 +1564,103 @@ for (;;)
        }
      break;

+      /*-----------------------------------------------------------------*/
+      case OP_VSPACE_EXTRA + OP_TYPEEXACT:
+      case OP_VSPACE_EXTRA + OP_TYPEUPTO:
+      case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
+      case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
+      if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
+        { ADD_ACTIVE(state_offset + 4, 0); }
+      count = current_state->count;  /* Number already matched */
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
+          case 0x000a:
+          case 0x000b:
+          case 0x000c:
+          case 0x000d:
+          case 0x0085:
+          case 0x2028:
+          case 0x2029:
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          }
+
+        if (OK == (d == OP_VSPACE))
+          {
+          if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          if (++count >= GET2(code, 1))
+            { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
+          else
+            { ADD_NEW_DATA(-state_offset, count, 0); }
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_HSPACE_EXTRA + OP_TYPEEXACT:
+      case OP_HSPACE_EXTRA + OP_TYPEUPTO:
+      case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
+      case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
+      if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
+        { ADD_ACTIVE(state_offset + 4, 0); }
+      count = current_state->count;  /* Number already matched */
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
+          case 0x09:      /* HT */
+          case 0x20:      /* SPACE */
+          case 0xa0:      /* NBSP */
+          case 0x1680:    /* OGHAM SPACE MARK */
+          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
+          case 0x2000:    /* EN QUAD */
+          case 0x2001:    /* EM QUAD */
+          case 0x2002:    /* EN SPACE */
+          case 0x2003:    /* EM SPACE */
+          case 0x2004:    /* THREE-PER-EM SPACE */
+          case 0x2005:    /* FOUR-PER-EM SPACE */
+          case 0x2006:    /* SIX-PER-EM SPACE */
+          case 0x2007:    /* FIGURE SPACE */
+          case 0x2008:    /* PUNCTUATION SPACE */
+          case 0x2009:    /* THIN SPACE */
+          case 0x200A:    /* HAIR SPACE */
+          case 0x202f:    /* NARROW NO-BREAK SPACE */
+          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
+          case 0x3000:    /* IDEOGRAPHIC SPACE */
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          break;
+          }
+
+        if (OK == (d == OP_HSPACE))
+          {
+          if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          if (++count >= GET2(code, 1))
+            { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
+          else
+            { ADD_NEW_DATA(-state_offset, count, 0); }
+          }
+        }
+      break;
+
 /* ========================================================================== */
      /* These opcodes are followed by a character that is usually compared
      to the current subject character; it is loaded into d. We still get
@@ -1450,6 +1759,102 @@ for (;;)
        }
      break;

+      /*-----------------------------------------------------------------*/
+      case OP_NOT_VSPACE:
+      if (clen > 0) switch(c)
+        {
+        case 0x000a:
+        case 0x000b:
+        case 0x000c:
+        case 0x000d:
+        case 0x0085:
+        case 0x2028:
+        case 0x2029:
+        break;
+
+        default:
+        ADD_NEW(state_offset + 1, 0);
+        break;
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_VSPACE:
+      if (clen > 0) switch(c)
+        {
+        case 0x000a:
+        case 0x000b:
+        case 0x000c:
+        case 0x000d:
+        case 0x0085:
+        case 0x2028:
+        case 0x2029:
+        ADD_NEW(state_offset + 1, 0);
+        break;
+
+        default: break;
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_NOT_HSPACE:
+      if (clen > 0) switch(c)
+        {
+        case 0x09:      /* HT */
+        case 0x20:      /* SPACE */
+        case 0xa0:      /* NBSP */
+        case 0x1680:    /* OGHAM SPACE MARK */
+        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
+        case 0x2000:    /* EN QUAD */
+        case 0x2001:    /* EM QUAD */
+        case 0x2002:    /* EN SPACE */
+        case 0x2003:    /* EM SPACE */
+        case 0x2004:    /* THREE-PER-EM SPACE */
+        case 0x2005:    /* FOUR-PER-EM SPACE */
+        case 0x2006:    /* SIX-PER-EM SPACE */
+        case 0x2007:    /* FIGURE SPACE */
+        case 0x2008:    /* PUNCTUATION SPACE */
+        case 0x2009:    /* THIN SPACE */
+        case 0x200A:    /* HAIR SPACE */
+        case 0x202f:    /* NARROW NO-BREAK SPACE */
+        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
+        case 0x3000:    /* IDEOGRAPHIC SPACE */
+        break;
+
+        default:
+        ADD_NEW(state_offset + 1, 0);
+        break;
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_HSPACE:
+      if (clen > 0) switch(c)
+        {
+        case 0x09:      /* HT */
+        case 0x20:      /* SPACE */
+        case 0xa0:      /* NBSP */
+        case 0x1680:    /* OGHAM SPACE MARK */
+        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
+        case 0x2000:    /* EN QUAD */
+        case 0x2001:    /* EM QUAD */
+        case 0x2002:    /* EN SPACE */
+        case 0x2003:    /* EM SPACE */
+        case 0x2004:    /* THREE-PER-EM SPACE */
+        case 0x2005:    /* FOUR-PER-EM SPACE */
+        case 0x2006:    /* SIX-PER-EM SPACE */
+        case 0x2007:    /* FIGURE SPACE */
+        case 0x2008:    /* PUNCTUATION SPACE */
+        case 0x2009:    /* THIN SPACE */
+        case 0x200A:    /* HAIR SPACE */
+        case 0x202f:    /* NARROW NO-BREAK SPACE */
+        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
+        case 0x3000:    /* IDEOGRAPHIC SPACE */
+        ADD_NEW(state_offset + 1, 0);
+        break;
+        }
+      break;
+
      /*-----------------------------------------------------------------*/
      /* Match a negated single character. This is only used for one-byte
      characters, that is, we know that d < 256. The character we are
@@ -2057,7 +2462,7 @@ is not anchored.

 Arguments:
  argument_re     points to the compiled expression
-  extra_data      points to extra data or is NULL (not currently used)
+  extra_data      points to extra data or is NULL
  subject         points to the subject string
  length          length of subject string (may contain binary zeros)
  start_offset    where to start in the subject string
@@ -2073,7 +2478,7 @@ Returns:          > 0 => number of match offset pairs placed in offsets
                 < -1 => some kind of unexpected problem
 */

-PCRE_DATA_SCOPE int
+PCRE_EXP_DEFN int
 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
  const char *subject, int length, int start_offset, int options, int *offsets,
  int offsetcount, int *workspace, int wscount)
@@ -2163,10 +2568,10 @@ md->end_subject = end_subject;
 md->moptions = options;
 md->poptions = re->options;

-/* Handle different types of newline. The two bits give four cases. If nothing
-is set at run time, whatever was used at compile time applies. */
+/* Handle different types of newline. The three bits give eight cases. If
+nothing is set at run time, whatever was used at compile time applies. */

-switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
+switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
         PCRE_NEWLINE_BITS)
  {
  case 0: newline = NEWLINE; break;   /* Compile-time default */
@@ -2175,10 +2580,15 @@ switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
  case PCRE_NEWLINE_CR+
       PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
  case PCRE_NEWLINE_ANY: newline = -1; break;
+  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
  default: return PCRE_ERROR_BADNEWLINE;
  }

-if (newline < 0)
+if (newline == -2)
+  {
+  md->nltype = NLTYPE_ANYCRLF;
+  }
+else if (newline < 0)
  {
  md->nltype = NLTYPE_ANY;
  }
@@ -2308,6 +2718,16 @@ for (;;)
        {
        while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
          current_subject++;
+
+        /* If we have just passed a CR and the newline option is ANY or
+        ANYCRLF, and we are now at a LF, advance the match position by one more
+        character. */
+
+        if (current_subject[-1] == '\r' &&
+             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
+             current_subject < end_subject &&
+             *current_subject == '\n')
+          current_subject++;
        }
      }

@@ -2416,11 +2836,14 @@ for (;;)
    }
  if (current_subject > end_subject) break;

-  /* If we have just passed a CR and the newline option is CRLF or ANY, and we
-  are now at a LF, advance the match position by one more character. */
+  /* If we have just passed a CR and the newline option is CRLF or ANY or
+  ANYCRLF, and we are now at a LF, advance the match position by one more
+  character. */

  if (current_subject[-1] == '\r' &&
-       (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
+       (md->nltype == NLTYPE_ANY ||
+        md->nltype == NLTYPE_ANYCRLF ||
+        md->nllen == 2) &&
       current_subject < end_subject &&
       *current_subject == '\n')
    current_subject++;