Update PCRE to 7.8

svn path=/trunk/; revision=7813
This commit is contained in:
Matthias Clasen
2009-01-18 06:32:03 +00:00
parent 1da8112081
commit d6f23279e7
21 changed files with 411 additions and 321 deletions

View File

@@ -512,9 +512,6 @@ for (;;)
const uschar *code;
int state_offset = current_state->offset;
int count, codevalue;
#ifdef SUPPORT_UCP
int chartype, script;
#endif
#ifdef DEBUG
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -825,7 +822,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int chartype = UCD_CHARTYPE(c);
switch(code[1])
{
case PT_ANY:
@@ -837,7 +834,7 @@ for (;;)
break;
case PT_GC:
OK = category == code[2];
OK = _pcre_ucp_gentype[chartype] == code[2];
break;
case PT_PC:
@@ -845,7 +842,7 @@ for (;;)
break;
case PT_SC:
OK = script == code[2];
OK = UCD_SCRIPT(c) == code[2];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -994,7 +991,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int chartype = UCD_CHARTYPE(c);
switch(code[2])
{
case PT_ANY:
@@ -1006,7 +1003,7 @@ for (;;)
break;
case PT_GC:
OK = category == code[3];
OK = _pcre_ucp_gentype[chartype] == code[3];
break;
case PT_PC:
@@ -1014,7 +1011,7 @@ for (;;)
break;
case PT_SC:
OK = script == code[3];
OK = UCD_SCRIPT(c) == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1043,7 +1040,7 @@ for (;;)
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1057,7 +1054,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1216,7 +1213,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int chartype = UCD_CHARTYPE(c);
switch(code[2])
{
case PT_ANY:
@@ -1228,7 +1225,7 @@ for (;;)
break;
case PT_GC:
OK = category == code[3];
OK = _pcre_ucp_gentype[chartype] == code[3];
break;
case PT_PC:
@@ -1236,7 +1233,7 @@ for (;;)
break;
case PT_SC:
OK = script == code[3];
OK = UCD_SCRIPT(c) == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1274,7 +1271,7 @@ for (;;)
QS2:
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1289,7 +1286,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1463,7 +1460,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int chartype = UCD_CHARTYPE(c);
switch(code[4])
{
case PT_ANY:
@@ -1475,7 +1472,7 @@ for (;;)
break;
case PT_GC:
OK = category == code[5];
OK = _pcre_ucp_gentype[chartype] == code[5];
break;
case PT_PC:
@@ -1483,7 +1480,7 @@ for (;;)
break;
case PT_SC:
OK = script == code[5];
OK = UCD_SCRIPT(c) == code[5];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1516,7 +1513,7 @@ for (;;)
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 4, 0); }
count = current_state->count; /* Number already matched */
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1530,7 +1527,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1710,7 +1707,7 @@ for (;;)
other case of the character. */
#ifdef SUPPORT_UCP
othercase = _pcre_ucp_othercase(c);
othercase = UCD_OTHERCASE(c);
#else
othercase = NOTACHAR;
#endif
@@ -1735,7 +1732,7 @@ for (;;)
to wait for them to pass before continuing. */
case OP_EXTUNI:
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1743,7 +1740,7 @@ for (;;)
{
int nclen = 1;
GETCHARLEN(c, nptr, nclen);
if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(c) != ucp_M) break;
ncount++;
nptr += nclen;
}
@@ -1911,7 +1908,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -1949,7 +1946,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -1985,7 +1982,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -2017,7 +2014,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -2052,7 +2049,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -2508,7 +2505,7 @@ Returns: > 0 => number of match offset pairs placed in offsets
< -1 => some kind of unexpected problem
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
@@ -2736,7 +2733,18 @@ for (;;)
if (firstline)
{
const uschar *t = current_subject;
USPTR t = current_subject;
#ifdef SUPPORT_UTF8
if (utf8)
{
while (t < md->end_subject && !IS_NEWLINE(t))
{
t++;
while (t < end_subject && (*t & 0xc0) == 0x80) t++;
}
}
else
#endif
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
end_subject = t;
}
@@ -2758,7 +2766,20 @@ for (;;)
{
if (current_subject > md->start_subject + start_offset)
{
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
#ifdef SUPPORT_UTF8
if (utf8)
{
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
{
current_subject++;
while(current_subject < end_subject &&
(*current_subject & 0xc0) == 0x80)
current_subject++;
}
}
else
#endif
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
current_subject++;
/* If we have just passed a CR and the newline option is ANY or