mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-08-22 17:08:53 +02:00
Update to PCRE 7.2
svn path=/trunk/; revision=5659
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -63,24 +63,30 @@ applications. */
|
||||
|
||||
/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
|
||||
into others, under special conditions. A gap of 20 between the blocks should be
|
||||
enough. */
|
||||
enough. The resulting opcodes don't have to be less than 256 because they are
|
||||
never stored, so we push them well clear of the normal opcodes. */
|
||||
|
||||
#define OP_PROP_EXTRA 100
|
||||
#define OP_EXTUNI_EXTRA 120
|
||||
#define OP_ANYNL_EXTRA 140
|
||||
#define OP_PROP_EXTRA 300
|
||||
#define OP_EXTUNI_EXTRA 320
|
||||
#define OP_ANYNL_EXTRA 340
|
||||
#define OP_HSPACE_EXTRA 360
|
||||
#define OP_VSPACE_EXTRA 380
|
||||
|
||||
|
||||
/* This table identifies those opcodes that are followed immediately by a
|
||||
character that is to be tested in some way. This makes is possible to
|
||||
centralize the loading of these characters. In the case of Type * etc, the
|
||||
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
|
||||
small value. */
|
||||
small value. ***NOTE*** If the start of this table is modified, the two tables
|
||||
that follow must also be modified. */
|
||||
|
||||
static uschar coptable[] = {
|
||||
0, /* End */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */
|
||||
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
|
||||
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
|
||||
0, 0, /* Any, Anybyte */
|
||||
0, 0, 0, 0, /* NOTPROP, PROP, EXTUNI, ANYNL */
|
||||
0, 0, 0, /* NOTPROP, PROP, EXTUNI */
|
||||
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
||||
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
|
||||
1, /* Char */
|
||||
1, /* Charnc */
|
||||
@@ -127,7 +133,7 @@ static uschar coptable[] = {
|
||||
and \w */
|
||||
|
||||
static uschar toptable1[] = {
|
||||
0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
ctype_digit, ctype_digit,
|
||||
ctype_space, ctype_space,
|
||||
ctype_word, ctype_word,
|
||||
@@ -135,7 +141,7 @@ static uschar toptable1[] = {
|
||||
};
|
||||
|
||||
static uschar toptable2[] = {
|
||||
0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
ctype_digit, 0,
|
||||
ctype_space, 0,
|
||||
ctype_word, 0,
|
||||
@@ -500,7 +506,9 @@ for (;;)
|
||||
const uschar *code;
|
||||
int state_offset = current_state->offset;
|
||||
int count, codevalue;
|
||||
#ifdef SUPPORT_UCP
|
||||
int chartype, script;
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
|
||||
@@ -555,10 +563,10 @@ for (;;)
|
||||
permitted.
|
||||
|
||||
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
|
||||
argument that is not a data character - but is always one byte long.
|
||||
Unfortunately, we have to take special action to deal with \P, \p, and
|
||||
\X in this case. To keep the other cases fast, convert these ones to new
|
||||
opcodes. */
|
||||
argument that is not a data character - but is always one byte long. We
|
||||
have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in
|
||||
this case. To keep the other cases fast, convert these ones to new opcodes.
|
||||
*/
|
||||
|
||||
if (coptable[codevalue] > 0)
|
||||
{
|
||||
@@ -576,6 +584,10 @@ for (;;)
|
||||
case OP_PROP: codevalue += OP_PROP_EXTRA; break;
|
||||
case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
|
||||
case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
@@ -783,13 +795,12 @@ for (;;)
|
||||
break;
|
||||
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Check the next character by Unicode property. We will get here only
|
||||
if the support is in the binary; otherwise a compile-time error occurs.
|
||||
*/
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (clen > 0)
|
||||
@@ -970,6 +981,7 @@ for (;;)
|
||||
argument. It keeps the code above fast for the other cases. The argument
|
||||
is in the d variable. */
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
||||
@@ -1049,6 +1061,7 @@ for (;;)
|
||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPLUS:
|
||||
@@ -1085,6 +1098,97 @@ for (;;)
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPLUS:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x000d:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPLUS:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x09: /* HT */
|
||||
case 0x20: /* SPACE */
|
||||
case 0xa0: /* NBSP */
|
||||
case 0x1680: /* OGHAM SPACE MARK */
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||
case 0x2000: /* EN QUAD */
|
||||
case 0x2001: /* EM QUAD */
|
||||
case 0x2002: /* EN SPACE */
|
||||
case 0x2003: /* EM SPACE */
|
||||
case 0x2004: /* THREE-PER-EM SPACE */
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||
case 0x2006: /* SIX-PER-EM SPACE */
|
||||
case 0x2007: /* FIGURE SPACE */
|
||||
case 0x2008: /* PUNCTUATION SPACE */
|
||||
case 0x2009: /* THIN SPACE */
|
||||
case 0x200A: /* HAIR SPACE */
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
#ifdef SUPPORT_UCP
|
||||
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
||||
@@ -1182,6 +1286,7 @@ for (;;)
|
||||
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEQUERY:
|
||||
@@ -1226,6 +1331,112 @@ for (;;)
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEQUERY:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS4;
|
||||
|
||||
case OP_VSPACE_EXTRA + OP_TYPESTAR:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS4:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x000d:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + count), 0, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEQUERY:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS5;
|
||||
|
||||
case OP_HSPACE_EXTRA + OP_TYPESTAR:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS5:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x09: /* HT */
|
||||
case 0x20: /* SPACE */
|
||||
case 0xa0: /* NBSP */
|
||||
case 0x1680: /* OGHAM SPACE MARK */
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||
case 0x2000: /* EN QUAD */
|
||||
case 0x2001: /* EM QUAD */
|
||||
case 0x2002: /* EN SPACE */
|
||||
case 0x2003: /* EM SPACE */
|
||||
case 0x2004: /* THREE-PER-EM SPACE */
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||
case 0x2006: /* SIX-PER-EM SPACE */
|
||||
case 0x2007: /* FIGURE SPACE */
|
||||
case 0x2008: /* PUNCTUATION SPACE */
|
||||
case 0x2009: /* THIN SPACE */
|
||||
case 0x200A: /* HAIR SPACE */
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + count), 0, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
#ifdef SUPPORT_UCP
|
||||
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
||||
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
||||
@@ -1313,6 +1524,7 @@ for (;;)
|
||||
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEEXACT:
|
||||
@@ -1352,6 +1564,103 @@ for (;;)
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEEXACT:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEUPTO:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 4, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x000d:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEEXACT:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEUPTO:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 4, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x09: /* HT */
|
||||
case 0x20: /* SPACE */
|
||||
case 0xa0: /* NBSP */
|
||||
case 0x1680: /* OGHAM SPACE MARK */
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||
case 0x2000: /* EN QUAD */
|
||||
case 0x2001: /* EM QUAD */
|
||||
case 0x2002: /* EN SPACE */
|
||||
case 0x2003: /* EM SPACE */
|
||||
case 0x2004: /* THREE-PER-EM SPACE */
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||
case 0x2006: /* SIX-PER-EM SPACE */
|
||||
case 0x2007: /* FIGURE SPACE */
|
||||
case 0x2008: /* PUNCTUATION SPACE */
|
||||
case 0x2009: /* THIN SPACE */
|
||||
case 0x200A: /* HAIR SPACE */
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These opcodes are followed by a character that is usually compared
|
||||
to the current subject character; it is loaded into d. We still get
|
||||
@@ -1450,6 +1759,102 @@ for (;;)
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_NOT_VSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x000d:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
break;
|
||||
|
||||
default:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x000d:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_NOT_HSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case 0x09: /* HT */
|
||||
case 0x20: /* SPACE */
|
||||
case 0xa0: /* NBSP */
|
||||
case 0x1680: /* OGHAM SPACE MARK */
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||
case 0x2000: /* EN QUAD */
|
||||
case 0x2001: /* EM QUAD */
|
||||
case 0x2002: /* EN SPACE */
|
||||
case 0x2003: /* EM SPACE */
|
||||
case 0x2004: /* THREE-PER-EM SPACE */
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||
case 0x2006: /* SIX-PER-EM SPACE */
|
||||
case 0x2007: /* FIGURE SPACE */
|
||||
case 0x2008: /* PUNCTUATION SPACE */
|
||||
case 0x2009: /* THIN SPACE */
|
||||
case 0x200A: /* HAIR SPACE */
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||
break;
|
||||
|
||||
default:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case 0x09: /* HT */
|
||||
case 0x20: /* SPACE */
|
||||
case 0xa0: /* NBSP */
|
||||
case 0x1680: /* OGHAM SPACE MARK */
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||
case 0x2000: /* EN QUAD */
|
||||
case 0x2001: /* EM QUAD */
|
||||
case 0x2002: /* EN SPACE */
|
||||
case 0x2003: /* EM SPACE */
|
||||
case 0x2004: /* THREE-PER-EM SPACE */
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||
case 0x2006: /* SIX-PER-EM SPACE */
|
||||
case 0x2007: /* FIGURE SPACE */
|
||||
case 0x2008: /* PUNCTUATION SPACE */
|
||||
case 0x2009: /* THIN SPACE */
|
||||
case 0x200A: /* HAIR SPACE */
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Match a negated single character. This is only used for one-byte
|
||||
characters, that is, we know that d < 256. The character we are
|
||||
@@ -2057,7 +2462,7 @@ is not anchored.
|
||||
|
||||
Arguments:
|
||||
argument_re points to the compiled expression
|
||||
extra_data points to extra data or is NULL (not currently used)
|
||||
extra_data points to extra data or is NULL
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
@@ -2073,7 +2478,7 @@ Returns: > 0 => number of match offset pairs placed in offsets
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
const char *subject, int length, int start_offset, int options, int *offsets,
|
||||
int offsetcount, int *workspace, int wscount)
|
||||
@@ -2163,10 +2568,10 @@ md->end_subject = end_subject;
|
||||
md->moptions = options;
|
||||
md->poptions = re->options;
|
||||
|
||||
/* Handle different types of newline. The two bits give four cases. If nothing
|
||||
is set at run time, whatever was used at compile time applies. */
|
||||
/* Handle different types of newline. The three bits give eight cases. If
|
||||
nothing is set at run time, whatever was used at compile time applies. */
|
||||
|
||||
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
|
||||
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
|
||||
PCRE_NEWLINE_BITS)
|
||||
{
|
||||
case 0: newline = NEWLINE; break; /* Compile-time default */
|
||||
@@ -2175,10 +2580,15 @@ switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
|
||||
case PCRE_NEWLINE_CR+
|
||||
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
||||
case PCRE_NEWLINE_ANY: newline = -1; break;
|
||||
case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
|
||||
default: return PCRE_ERROR_BADNEWLINE;
|
||||
}
|
||||
|
||||
if (newline < 0)
|
||||
if (newline == -2)
|
||||
{
|
||||
md->nltype = NLTYPE_ANYCRLF;
|
||||
}
|
||||
else if (newline < 0)
|
||||
{
|
||||
md->nltype = NLTYPE_ANY;
|
||||
}
|
||||
@@ -2308,6 +2718,16 @@ for (;;)
|
||||
{
|
||||
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
|
||||
current_subject++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||
character. */
|
||||
|
||||
if (current_subject[-1] == '\r' &&
|
||||
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
|
||||
current_subject < end_subject &&
|
||||
*current_subject == '\n')
|
||||
current_subject++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2416,11 +2836,14 @@ for (;;)
|
||||
}
|
||||
if (current_subject > end_subject) break;
|
||||
|
||||
/* If we have just passed a CR and the newline option is CRLF or ANY, and we
|
||||
are now at a LF, advance the match position by one more character. */
|
||||
/* If we have just passed a CR and the newline option is CRLF or ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||
character. */
|
||||
|
||||
if (current_subject[-1] == '\r' &&
|
||||
(md->nltype == NLTYPE_ANY || md->nllen == 2) &&
|
||||
(md->nltype == NLTYPE_ANY ||
|
||||
md->nltype == NLTYPE_ANYCRLF ||
|
||||
md->nllen == 2) &&
|
||||
current_subject < end_subject &&
|
||||
*current_subject == '\n')
|
||||
current_subject++;
|
||||
|
Reference in New Issue
Block a user