Update PCRE to 7.8

svn path=/trunk/; revision=7813
This commit is contained in:
Matthias Clasen 2009-01-18 06:32:03 +00:00
parent 1da8112081
commit d6f23279e7
21 changed files with 411 additions and 321 deletions

View File

@ -1,3 +1,7 @@
2009-01-18 Matthias Clasen <mclasen@redhat.com>
* glib/pcre: Update to PCRE 7.8
2009-01-17 Matthias Clasen <mclasen@redhat.com>
Bug 567977 textdomain() macro should not return NULL when

View File

@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE_MAJOR 7
#define PCRE_MINOR 7
#define PCRE_MINOR 8
#define PCRE_PRERELEASE
#define PCRE_DATE 2008-05-07
#define PCRE_DATE 2008-09-05
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate

View File

@ -1,6 +1,3 @@
/* This file is autogenerated by ../update-pcre/update.sh during
* the update of the local copy of PCRE.
*/
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/

View File

@ -331,7 +331,7 @@ static const char *
find_error_text(int n)
{
const char *s = error_texts;
for (; n > 0; n--) while (*s++ != 0);
for (; n > 0; n--) while (*s++ != 0) {};
return s;
}
@ -437,7 +437,7 @@ else
{
const uschar *p;
for (p = ptr+2; *p != 0 && *p != '}'; p++)
if (*p != '-' && g_ascii_isdigit(*p) == 0) break;
if (*p != '-' && g_ascii_isdigit (*p) == 0) break;
if (*p != 0 && *p != '}')
{
c = -ESC_k;
@ -456,7 +456,7 @@ else
else negated = FALSE;
c = 0;
while (g_ascii_isdigit(ptr[1]) != 0)
while (g_ascii_isdigit (ptr[1]) != 0)
c = c * 10 + *(++ptr) - '0';
if (c < 0) /* Integer overflow */
@ -509,7 +509,7 @@ else
{
oldptr = ptr;
c -= '0';
while (g_ascii_isdigit(ptr[1]) != 0)
while (g_ascii_isdigit (ptr[1]))
c = c * 10 + *(++ptr) - '0';
if (c < 0) /* Integer overflow */
{
@ -559,7 +559,7 @@ else
int count = 0;
c = 0;
while (g_ascii_isxdigit(*pt) != 0)
while (g_ascii_isxdigit (*pt) != 0)
{
register int cc = *pt++;
if (c == 0 && cc == '0') continue; /* Leading zeroes */
@ -588,7 +588,7 @@ else
/* Read just a single-byte hex-defined char */
c = 0;
while (i++ < 2 && g_ascii_isxdigit(ptr[1]) != 0)
while (i++ < 2 && g_ascii_isxdigit (ptr[1]) != 0)
{
int cc; /* Some compilers don't like ++ */
cc = *(++ptr); /* in initializers */
@ -757,15 +757,15 @@ Returns: TRUE or FALSE
static BOOL
is_counted_repeat(const uschar *p)
{
if (g_ascii_isdigit(*p++) == 0) return FALSE;
while (g_ascii_isdigit(*p) != 0) p++;
if (g_ascii_isdigit (*p++) == 0) return FALSE;
while (g_ascii_isdigit (*p) != 0) p++;
if (*p == '}') return TRUE;
if (*p++ != ',') return FALSE;
if (*p == '}') return TRUE;
if (g_ascii_isdigit(*p++) == 0) return FALSE;
while (g_ascii_isdigit(*p) != 0) p++;
if (g_ascii_isdigit (*p++) == 0) return FALSE;
while (g_ascii_isdigit (*p) != 0) p++;
return (*p == '}');
}
@ -800,7 +800,7 @@ int max = -1;
/* Read the minimum value and do a paranoid check: a negative value indicates
an integer overflow. */
while (g_ascii_isdigit(*p) != 0) min = min * 10 + *p++ - '0';
while (g_ascii_isdigit (*p) != 0) min = min * 10 + *p++ - '0';
if (min < 0 || min > 65535)
{
*errorcodeptr = ERR5;
@ -815,7 +815,7 @@ if (*p == '}') max = min; else
if (*(++p) != '}')
{
max = 0;
while(g_ascii_isdigit(*p) != 0) max = max * 10 + *p++ - '0';
while(g_ascii_isdigit (*p) != 0) max = max * 10 + *p++ - '0';
if (max < 0 || max > 65535)
{
*errorcodeptr = ERR5;
@ -878,7 +878,7 @@ for (; *ptr != 0; ptr++)
if (*(++ptr) == 0) return -1;
if (*ptr == 'Q') for (;;)
{
while (*(++ptr) != 0 && *ptr != '\\');
while (*(++ptr) != 0 && *ptr != '\\') {};
if (*ptr == 0) return -1;
if (*(++ptr) == 'E') break;
}
@ -921,7 +921,7 @@ for (; *ptr != 0; ptr++)
if (*(++ptr) == 0) return -1;
if (*ptr == 'Q') for (;;)
{
while (*(++ptr) != 0 && *ptr != '\\');
while (*(++ptr) != 0 && *ptr != '\\') {};
if (*ptr == 0) return -1;
if (*(++ptr) == 'E') break;
}
@ -935,7 +935,7 @@ for (; *ptr != 0; ptr++)
if (xmode && *ptr == '#')
{
while (*(++ptr) != 0 && *ptr != '\n');
while (*(++ptr) != 0 && *ptr != '\n') {};
if (*ptr == 0) return -1;
continue;
}
@ -1326,6 +1326,8 @@ for (;;)
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
break;
}
#else
(void)(utf8); /* Keep compiler happy by referencing function argument */
#endif
}
}
@ -1419,6 +1421,8 @@ for (;;)
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
break;
}
#else
(void)(utf8); /* Keep compiler happy by referencing function argument */
#endif
}
}
@ -1891,7 +1895,7 @@ get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,
unsigned int c, othercase, next;
for (c = *cptr; c <= d; c++)
{ if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }
{ if ((othercase = UCD_OTHERCASE(c)) != c) break; }
if (c > d) return FALSE;
@ -1900,7 +1904,7 @@ next = othercase + 1;
for (++c; c <= d; c++)
{
if (_pcre_ucp_othercase(c) != next) break;
if (UCD_OTHERCASE(c) != next) break;
next++;
}
@ -2010,6 +2014,8 @@ if (next >= 0) switch(op_code)
case OP_CHAR:
#ifdef SUPPORT_UTF8
if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
#else
(void)(utf8_char); /* Keep compiler happy by referencing function argument */
#endif
return item != next;
@ -2028,7 +2034,7 @@ if (next >= 0) switch(op_code)
unsigned int othercase;
if (next < 128) othercase = cd->fcc[next]; else
#ifdef SUPPORT_UCP
othercase = _pcre_ucp_othercase((unsigned int)next);
othercase = UCD_OTHERCASE((unsigned int)next);
#else
othercase = NOTACHAR;
#endif
@ -2049,7 +2055,7 @@ if (next >= 0) switch(op_code)
unsigned int othercase;
if (next < 128) othercase = cd->fcc[next]; else
#ifdef SUPPORT_UCP
othercase = _pcre_ucp_othercase(next);
othercase = UCD_OTHERCASE(next);
#else
othercase = NOTACHAR;
#endif
@ -3215,7 +3221,7 @@ for (;; ptr++)
if ((options & PCRE_CASELESS) != 0)
{
unsigned int othercase;
if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)
if ((othercase = UCD_OTHERCASE(c)) != c)
{
*class_utf8data++ = XCL_SINGLE;
class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
@ -4092,7 +4098,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
const char *vn = verbnames;
const uschar *name = ++ptr;
previous = NULL;
while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
if (*ptr == ':')
{
*errorcodeptr = ERR59; /* Not supported */
@ -4230,7 +4236,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
while ((cd->ctypes[*ptr] & ctype_word) != 0)
{
if (recno >= 0)
recno = (g_ascii_isdigit(*ptr) != 0)?
recno = (g_ascii_isdigit (*ptr) != 0)?
recno * 10 + *ptr - '0' : -1;
ptr++;
}
@ -4315,7 +4321,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
recno = 0;
for (i = 1; i < namelen; i++)
{
if (g_ascii_isdigit(name[i]) == 0)
if (g_ascii_isdigit (name[i]) == 0)
{
*errorcodeptr = ERR15;
goto FAILED;
@ -4411,7 +4417,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
*code++ = OP_CALLOUT;
{
int n = 0;
while (g_ascii_isdigit(*(++ptr)) != 0)
while (g_ascii_isdigit (*(++ptr)) != 0)
n = n * 10 + *ptr - '0';
if (*ptr != ')')
{
@ -4626,7 +4632,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
if ((refsign = *ptr) == '+')
{
ptr++;
if (g_ascii_isdigit(*ptr) == 0)
if (g_ascii_isdigit (*ptr) == 0)
{
*errorcodeptr = ERR63;
goto FAILED;
@ -4634,13 +4640,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
}
else if (refsign == '-')
{
if (g_ascii_isdigit(ptr[1]) == 0)
if (g_ascii_isdigit (ptr[1]) == 0)
goto OTHER_CHAR_AFTER_QUERY;
ptr++;
}
recno = 0;
while(g_ascii_isdigit(*ptr) != 0)
while(g_ascii_isdigit (*ptr) != 0)
recno = recno * 10 + *ptr++ - '0';
if (*ptr != terminator)
@ -4796,10 +4802,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
both phases.
If we are not at the pattern start, compile code to change the ims
options if this setting actually changes any of them. We also pass the
new setting back so that it can be put at the start of any following
branches, and when this group ends (if we are in a group), a resetting
item can be compiled. */
options if this setting actually changes any of them, and reset the
greedy defaults and the case value for firstbyte and reqbyte. */
if (*ptr == ')')
{
@ -4807,7 +4811,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
(lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
{
cd->external_options = newoptions;
options = *optionsptr = newoptions;
}
else
{
@ -4816,17 +4819,17 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
*code++ = OP_OPT;
*code++ = newoptions & PCRE_IMS;
}
/* Change options at this level, and pass them back for use
in subsequent branches. Reset the greedy defaults and the case
value for firstbyte and reqbyte. */
*optionsptr = options = newoptions;
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
greedy_non_default = greedy_default ^ 1;
req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
}
/* Change options at this level, and pass them back for use
in subsequent branches. When not at the start of the pattern, this
information is also necessary so that a resetting item can be
compiled at the end of a group (if we are in a group). */
*optionsptr = options = newoptions;
previous = NULL; /* This item can't be repeated */
continue; /* It is complete */
}
@ -5115,7 +5118,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Test a signed number in angle brackets or quotes. */
p = ptr + 2;
while (g_ascii_isdigit(*p) != 0) p++;
while (g_ascii_isdigit (*p) != 0) p++;
if (*p != terminator)
{
*errorcodeptr = ERR57;
@ -5820,7 +5823,7 @@ Returns: pointer to compiled data block, or NULL on error,
with errorptr and erroroffset set
*/
PCRE_EXP_DEFN pcre *
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
pcre_compile(const char *pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
{
@ -5828,7 +5831,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
}
PCRE_EXP_DEFN pcre *
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
const char **errorptr, int *erroroffset, const unsigned char *tables)
{

View File

@ -62,7 +62,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_config(int what, void *where)
{
switch (what)

View File

@ -512,9 +512,6 @@ for (;;)
const uschar *code;
int state_offset = current_state->offset;
int count, codevalue;
#ifdef SUPPORT_UCP
int chartype, script;
#endif
#ifdef DEBUG
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@ -825,7 +822,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int chartype = UCD_CHARTYPE(c);
switch(code[1])
{
case PT_ANY:
@ -837,7 +834,7 @@ for (;;)
break;
case PT_GC:
OK = category == code[2];
OK = _pcre_ucp_gentype[chartype] == code[2];
break;
case PT_PC:
@ -845,7 +842,7 @@ for (;;)
break;
case PT_SC:
OK = script == code[2];
OK = UCD_SCRIPT(c) == code[2];
break;
/* Should never occur, but keep compilers from grumbling. */
@ -994,7 +991,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int chartype = UCD_CHARTYPE(c);
switch(code[2])
{
case PT_ANY:
@ -1006,7 +1003,7 @@ for (;;)
break;
case PT_GC:
OK = category == code[3];
OK = _pcre_ucp_gentype[chartype] == code[3];
break;
case PT_PC:
@ -1014,7 +1011,7 @@ for (;;)
break;
case PT_SC:
OK = script == code[3];
OK = UCD_SCRIPT(c) == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
@ -1043,7 +1040,7 @@ for (;;)
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@ -1057,7 +1054,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@ -1216,7 +1213,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int chartype = UCD_CHARTYPE(c);
switch(code[2])
{
case PT_ANY:
@ -1228,7 +1225,7 @@ for (;;)
break;
case PT_GC:
OK = category == code[3];
OK = _pcre_ucp_gentype[chartype] == code[3];
break;
case PT_PC:
@ -1236,7 +1233,7 @@ for (;;)
break;
case PT_SC:
OK = script == code[3];
OK = UCD_SCRIPT(c) == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
@ -1274,7 +1271,7 @@ for (;;)
QS2:
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@ -1289,7 +1286,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@ -1463,7 +1460,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int chartype = UCD_CHARTYPE(c);
switch(code[4])
{
case PT_ANY:
@ -1475,7 +1472,7 @@ for (;;)
break;
case PT_GC:
OK = category == code[5];
OK = _pcre_ucp_gentype[chartype] == code[5];
break;
case PT_PC:
@ -1483,7 +1480,7 @@ for (;;)
break;
case PT_SC:
OK = script == code[5];
OK = UCD_SCRIPT(c) == code[5];
break;
/* Should never occur, but keep compilers from grumbling. */
@ -1516,7 +1513,7 @@ for (;;)
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 4, 0); }
count = current_state->count; /* Number already matched */
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@ -1530,7 +1527,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@ -1710,7 +1707,7 @@ for (;;)
other case of the character. */
#ifdef SUPPORT_UCP
othercase = _pcre_ucp_othercase(c);
othercase = UCD_OTHERCASE(c);
#else
othercase = NOTACHAR;
#endif
@ -1735,7 +1732,7 @@ for (;;)
to wait for them to pass before continuing. */
case OP_EXTUNI:
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@ -1743,7 +1740,7 @@ for (;;)
{
int nclen = 1;
GETCHARLEN(c, nptr, nclen);
if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(c) != ucp_M) break;
ncount++;
nptr += nclen;
}
@ -1911,7 +1908,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@ -1949,7 +1946,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@ -1985,7 +1982,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@ -2017,7 +2014,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@ -2052,7 +2049,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@ -2508,7 +2505,7 @@ Returns: > 0 => number of match offset pairs placed in offsets
< -1 => some kind of unexpected problem
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
@ -2736,7 +2733,18 @@ for (;;)
if (firstline)
{
const uschar *t = current_subject;
USPTR t = current_subject;
#ifdef SUPPORT_UTF8
if (utf8)
{
while (t < md->end_subject && !IS_NEWLINE(t))
{
t++;
while (t < end_subject && (*t & 0xc0) == 0x80) t++;
}
}
else
#endif
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
end_subject = t;
}
@ -2758,7 +2766,20 @@ for (;;)
{
if (current_subject > md->start_subject + start_offset)
{
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
#ifdef SUPPORT_UTF8
if (utf8)
{
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
{
current_subject++;
while(current_subject < end_subject &&
(*current_subject & 0xc0) == 0x80)
current_subject++;
}
}
else
#endif
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
current_subject++;
/* If we have just passed a CR and the newline option is ANY or

View File

@ -158,13 +158,39 @@ printf("\n");
if (length > md->end_subject - eptr) return FALSE;
/* Separate the caselesss case for speed */
/* Separate the caseless case for speed. In UTF-8 mode we can only do this
properly if Unicode properties are supported. Otherwise, we can check only
ASCII characters. */
if ((ims & PCRE_CASELESS) != 0)
{
#ifdef SUPPORT_UTF8
#ifdef SUPPORT_UCP
if (md->utf8)
{
USPTR endptr = eptr + length;
while (eptr < endptr)
{
int c, d;
GETCHARINC(c, eptr);
GETCHARINC(d, p);
if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
}
}
else
#endif
#endif
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
is no UCP support. */
while (length-- > 0)
if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
{ if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
}
/* In the caseful case, we can just compare the bytes, whether or not we
are in UTF-8 mode. */
else
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
@ -1653,9 +1679,7 @@ for (;;)
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
{
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int chartype = UCD_CHARTYPE(c);
switch(ecode[1])
{
case PT_ANY:
@ -1670,7 +1694,7 @@ for (;;)
break;
case PT_GC:
if ((ecode[2] != category) == (op == OP_PROP))
if ((ecode[2] != _pcre_ucp_gentype[chartype]) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
break;
@ -1680,7 +1704,7 @@ for (;;)
break;
case PT_SC:
if ((ecode[2] != script) == (op == OP_PROP))
if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
break;
@ -1699,8 +1723,7 @@ for (;;)
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
{
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int category = UCD_CATEGORY(c);
if (category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@ -1709,7 +1732,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
category = _pcre_ucp_findprop(c, &chartype, &script);
category = UCD_CATEGORY(c);
if (category != ucp_M) break;
eptr += len;
}
@ -2174,7 +2197,7 @@ for (;;)
if (fc != dc)
{
#ifdef SUPPORT_UCP
if (dc != _pcre_ucp_othercase(fc))
if (dc != UCD_OTHERCASE(fc))
#endif
RRETURN(MATCH_NOMATCH);
}
@ -2265,7 +2288,7 @@ for (;;)
#ifdef SUPPORT_UCP
unsigned int othercase;
if ((ims & PCRE_CASELESS) != 0 &&
(othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
(othercase = UCD_OTHERCASE(fc)) != fc)
oclength = _pcre_ord2utf8(othercase, occhars);
else oclength = 0;
#endif /* SUPPORT_UCP */
@ -2585,10 +2608,11 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(d, eptr);
if (d < 256) d = md->lcc[d];
if (fi >= max || eptr >= md->end_subject || fc == d)
RRETURN(MATCH_NOMATCH);
if (fc == d) RRETURN(MATCH_NOMATCH);
}
}
else
@ -2694,9 +2718,9 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(d, eptr);
if (fi >= max || eptr >= md->end_subject || fc == d)
RRETURN(MATCH_NOMATCH);
if (fc == d) RRETURN(MATCH_NOMATCH);
}
}
else
@ -2870,7 +2894,7 @@ for (;;)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
prop_chartype == ucp_Lt) == prop_fail_result)
@ -2883,7 +2907,7 @@ for (;;)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@ -2894,7 +2918,7 @@ for (;;)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@ -2905,7 +2929,7 @@ for (;;)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@ -2924,7 +2948,7 @@ for (;;)
for (i = 1; i <= min; i++)
{
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@ -2933,7 +2957,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
}
@ -3349,7 +3373,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
prop_chartype == ucp_Lt) == prop_fail_result)
@ -3364,7 +3388,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@ -3377,7 +3401,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@ -3390,7 +3414,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@ -3412,7 +3436,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@ -3421,7 +3445,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
}
@ -3739,7 +3763,7 @@ for (;;)
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
prop_chartype == ucp_Lt) == prop_fail_result)
@ -3754,7 +3778,7 @@ for (;;)
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
break;
eptr+= len;
@ -3767,7 +3791,7 @@ for (;;)
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
break;
eptr+= len;
@ -3780,7 +3804,7 @@ for (;;)
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
break;
eptr+= len;
@ -3809,7 +3833,7 @@ for (;;)
{
if (eptr >= md->end_subject) break;
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) break;
while (eptr < md->end_subject)
{
@ -3818,7 +3842,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
}
@ -3840,7 +3864,7 @@ for (;;)
BACKCHAR(eptr);
GETCHARLEN(c, eptr, len);
}
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr--;
}
@ -4360,7 +4384,7 @@ Returns: > 0 => success; value is the number of elements filled in
< -1 => some kind of unexpected problem
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
@ -4672,31 +4696,53 @@ for(;;)
if (firstline)
{
USPTR t = start_match;
#ifdef SUPPORT_UTF8
if (utf8)
{
while (t < md->end_subject && !IS_NEWLINE(t))
{
t++;
while (t < end_subject && (*t & 0xc0) == 0x80) t++;
}
}
else
#endif
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
end_subject = t;
}
/* Now test for a unique first byte */
/* Now advance to a unique first byte if there is one. */
if (first_byte >= 0)
{
if (first_byte_caseless)
while (start_match < end_subject &&
md->lcc[*start_match] != first_byte)
{ NEXTCHAR(start_match); }
while (start_match < end_subject && md->lcc[*start_match] != first_byte)
start_match++;
else
while (start_match < end_subject && *start_match != first_byte)
{ NEXTCHAR(start_match); }
start_match++;
}
/* Or to just after a linebreak for a multiline match if possible */
/* Or to just after a linebreak for a multiline match */
else if (startline)
{
if (start_match > md->start_subject + start_offset)
{
while (start_match <= end_subject && !WAS_NEWLINE(start_match))
{ NEXTCHAR(start_match); }
#ifdef SUPPORT_UTF8
if (utf8)
{
while (start_match < end_subject && !WAS_NEWLINE(start_match))
{
start_match++;
while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
start_match++;
}
}
else
#endif
while (start_match < end_subject && !WAS_NEWLINE(start_match))
start_match++;
/* If we have just passed a CR and the newline option is ANY or ANYCRLF,
and we are now at a LF, advance the match position by one more character.
@ -4710,16 +4756,15 @@ for(;;)
}
}
/* Or to a non-unique first char after study */
/* Or to a non-unique first byte after study */
else if (start_bits != NULL)
{
while (start_match < end_subject)
{
register unsigned int c = *start_match;
if ((start_bits[c/8] & (1 << (c&7))) == 0)
{ NEXTCHAR(start_match); }
else break;
if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
else break;
}
}

View File

@ -65,7 +65,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
{

View File

@ -65,7 +65,7 @@ Returns: the number of the named parentheses, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringnumber(const pcre *code, const char *stringname)
{
int rc;
@ -114,7 +114,7 @@ Returns: the length of each entry, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
{
@ -231,7 +231,7 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, char *buffer, int size)
{
@ -276,7 +276,7 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, char *buffer, int size)
{
@ -308,7 +308,7 @@ Returns: if successful: 0
PCRE_ERROR_NOMEMORY (-6) failed to get store
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
{
@ -353,7 +353,7 @@ Argument: the result of a previous pcre_get_substring_list()
Returns: nothing
*/
void
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring_list(const char **pointer)
{
(pcre_free)((void *)pointer);
@ -386,7 +386,7 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) substring not present
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
{
@ -433,7 +433,7 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, const char **stringptr)
{
@ -456,7 +456,7 @@ Argument: the result of a previous pcre_get_substring()
Returns: nothing
*/
void
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring(const char *pointer)
{
(pcre_free)((void *)pointer);

View File

@ -52,8 +52,6 @@ differently, and global variables are not used (see pcre.in). */
#include "pcre_internal.h"
#ifndef VPCOMPAT
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
#endif
/* End of pcre_globals.c */

View File

@ -72,7 +72,7 @@ Returns: number of capturing subpatterns
or negative values on error
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
{
real_pcre internal_re;

View File

@ -132,6 +132,20 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
# endif
#endif
/* When compiling with the MSVC compiler, it is sometimes necessary to include
a "calling convention" before exported function names. (This is secondhand
information; I know nothing about MSVC myself). For example, something like
void __cdecl function(....)
might be needed. In order so make this easy, all the exported functions have
PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not
set, we ensure here that it has no effect. */
#ifndef PCRE_CALL_CONVENTION
#define PCRE_CALL_CONVENTION
#endif
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
cannot determine these outside the compilation (e.g. by running a program as
part of "configure") because PCRE is often cross-compiled for use on other
@ -140,16 +154,20 @@ preprocessor time in standard C environments. */
#if USHRT_MAX == 65535
typedef unsigned short pcre_uint16;
typedef short pcre_int16;
#elif UINT_MAX == 65535
typedef unsigned int pcre_uint16;
typedef int pcre_int16;
#else
#error Cannot determine a type for 16-bit unsigned integers
#endif
#if UINT_MAX == 4294967295
typedef unsigned int pcre_uint32;
typedef int pcre_int32;
#elif ULONG_MAX == 4294967295
typedef unsigned long int pcre_uint32;
typedef long int pcre_int32;
#else
#error Cannot determine a type for 32-bit unsigned integers
#endif
@ -241,7 +259,6 @@ option on the command line. */
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
#define memcmp(s,c,n) _memcmp(s,c,n)
#define memcpy(d,s,n) _memcpy(d,s,n)
#define memmove(d,s,n) _memmove(d,s,n)
#define memset(s,c,n) _memset(s,c,n)
#else /* VPCOMPAT */
@ -363,7 +380,6 @@ never be called in byte mode. To make sure it can never even appear when UTF-8
support is omitted, we don't even define it. */
#ifndef SUPPORT_UTF8
#define NEXTCHAR(p) p++;
#define GETCHAR(c, eptr) c = *eptr;
#define GETCHARTEST(c, eptr) c = *eptr;
#define GETCHARINC(c, eptr) c = *eptr++;
@ -373,13 +389,6 @@ support is omitted, we don't even define it. */
#else /* SUPPORT_UTF8 */
/* Advance a character pointer one byte in non-UTF-8 mode and by one character
in UTF-8 mode. */
#define NEXTCHAR(p) \
p++; \
if (utf8) { while((*p & 0xc0) == 0x80) p++; }
/* Get the next UTF-8 character, not advancing the pointer. This is called when
we know we are in UTF-8 mode. */
@ -549,7 +558,8 @@ variable-length repeat, or a anything other than literal characters. */
#define REQ_CASELESS 0x0100 /* indicates caselessness */
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
/* Miscellaneous definitions */
/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
environments where these macros are defined elsewhere. */
typedef gboolean BOOL;
@ -1123,12 +1133,24 @@ extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
extern int _pcre_ord2utf8(int, uschar *);
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
const pcre_study_data *, pcre_study_data *);
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
extern unsigned int _pcre_ucp_othercase(const unsigned int);
extern int _pcre_valid_utf8(const uschar *, int);
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
int *, BOOL);
extern BOOL _pcre_xclass(int, const uschar *);
extern unsigned int _pcre_ucp_othercase(unsigned int);
extern const int _pcre_ucp_gentype[];
/* UCD access macros */
#include "../glib.h"
#define UCD_CHARTYPE(ch) g_unichar_type(ch)
#define UCD_SCRIPT(ch) g_unichar_get_script(ch)
#define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
#define UCD_OTHERCASE(ch) _pcre_ucp_othercase(ch)
#endif

View File

@ -78,8 +78,10 @@ for (j = i; j > 0; j--)
*buffer = _pcre_utf8_table2[i] | cvalue;
return i + 1;
#else
return 0; /* Keep compiler happy; this function won't ever be */
#endif /* called when SUPPORT_UTF8 is not defined. */
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
return 0;
#endif
}
/* End of pcre_ord2utf8.c */

View File

@ -68,7 +68,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_refcount(pcre *argument_re, int adjust)
{
real_pcre *re = (real_pcre *)argument_re;

View File

@ -220,6 +220,7 @@ do
/* SKIPZERO skips the bracket. */
case OP_SKIPZERO:
tcode++;
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
@ -503,7 +504,7 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
NULL on error or if no optimization possible
*/
PCRE_EXP_DEFN pcre_extra *
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
pcre_study(const pcre *external_re, int options, const char **errorptr)
{
uschar start_bits[32];

View File

@ -87,6 +87,19 @@ const uschar _pcre_utf8_table4[] = {
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
/* Table to translate from particular type value to the general value. */
const int _pcre_ucp_gentype[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/* The pcre_utt[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first
@ -94,7 +107,10 @@ field of each entry. However, that leads to a large number of relocations when
a shared library is dynamically loaded. A significant reduction is made by
putting all the names into a single, large string and then using offsets in the
table itself. Maintenance is more error-prone, but frequent changes to this
data is unlikely. */
data are unlikely.
July 2008: There is now a script called maint/GenerateUtt.py which can be used
to generate this data instead of maintaining it entirely by hand. */
const char _pcre_utt_names[] =
"Any\0"
@ -108,8 +124,10 @@ const char _pcre_utt_names[] =
"Buhid\0"
"C\0"
"Canadian_Aboriginal\0"
"Carian\0"
"Cc\0"
"Cf\0"
"Cham\0"
"Cherokee\0"
"Cn\0"
"Co\0"
@ -136,12 +154,14 @@ const char _pcre_utt_names[] =
"Inherited\0"
"Kannada\0"
"Katakana\0"
"Kayah_Li\0"
"Kharoshthi\0"
"Khmer\0"
"L\0"
"L&\0"
"Lao\0"
"Latin\0"
"Lepcha\0"
"Limbu\0"
"Linear_B\0"
"Ll\0"
@ -149,6 +169,8 @@ const char _pcre_utt_names[] =
"Lo\0"
"Lt\0"
"Lu\0"
"Lycian\0"
"Lydian\0"
"M\0"
"Malayalam\0"
"Mc\0"
@ -163,6 +185,7 @@ const char _pcre_utt_names[] =
"Nl\0"
"No\0"
"Ogham\0"
"Ol_Chiki\0"
"Old_Italic\0"
"Old_Persian\0"
"Oriya\0"
@ -177,14 +200,17 @@ const char _pcre_utt_names[] =
"Pi\0"
"Po\0"
"Ps\0"
"Rejang\0"
"Runic\0"
"S\0"
"Saurashtra\0"
"Sc\0"
"Shavian\0"
"Sinhala\0"
"Sk\0"
"Sm\0"
"So\0"
"Sundanese\0"
"Syloti_Nagri\0"
"Syriac\0"
"Tagalog\0"
@ -197,6 +223,7 @@ const char _pcre_utt_names[] =
"Tibetan\0"
"Tifinagh\0"
"Ugaritic\0"
"Vai\0"
"Yi\0"
"Z\0"
"Zl\0"
@ -204,111 +231,122 @@ const char _pcre_utt_names[] =
"Zs\0";
const ucp_type_table _pcre_utt[] = {
{ 0, PT_ANY, 0 },
{ 4, PT_SC, ucp_Arabic },
{ 11, PT_SC, ucp_Armenian },
{ 20, PT_SC, ucp_Balinese },
{ 29, PT_SC, ucp_Bengali },
{ 37, PT_SC, ucp_Bopomofo },
{ 46, PT_SC, ucp_Braille },
{ 54, PT_SC, ucp_Buginese },
{ 63, PT_SC, ucp_Buhid },
{ 69, PT_GC, ucp_C },
{ 71, PT_SC, ucp_Canadian_Aboriginal },
{ 91, PT_PC, ucp_Cc },
{ 94, PT_PC, ucp_Cf },
{ 97, PT_SC, ucp_Cherokee },
{ 106, PT_PC, ucp_Cn },
{ 109, PT_PC, ucp_Co },
{ 112, PT_SC, ucp_Common },
{ 119, PT_SC, ucp_Coptic },
{ 126, PT_PC, ucp_Cs },
{ 129, PT_SC, ucp_Cuneiform },
{ 139, PT_SC, ucp_Cypriot },
{ 147, PT_SC, ucp_Cyrillic },
{ 156, PT_SC, ucp_Deseret },
{ 164, PT_SC, ucp_Devanagari },
{ 175, PT_SC, ucp_Ethiopic },
{ 184, PT_SC, ucp_Georgian },
{ 193, PT_SC, ucp_Glagolitic },
{ 204, PT_SC, ucp_Gothic },
{ 211, PT_SC, ucp_Greek },
{ 217, PT_SC, ucp_Gujarati },
{ 226, PT_SC, ucp_Gurmukhi },
{ 235, PT_SC, ucp_Han },
{ 239, PT_SC, ucp_Hangul },
{ 246, PT_SC, ucp_Hanunoo },
{ 254, PT_SC, ucp_Hebrew },
{ 261, PT_SC, ucp_Hiragana },
{ 270, PT_SC, ucp_Inherited },
{ 280, PT_SC, ucp_Kannada },
{ 288, PT_SC, ucp_Katakana },
{ 297, PT_SC, ucp_Kharoshthi },
{ 308, PT_SC, ucp_Khmer },
{ 314, PT_GC, ucp_L },
{ 316, PT_LAMP, 0 },
{ 319, PT_SC, ucp_Lao },
{ 323, PT_SC, ucp_Latin },
{ 329, PT_SC, ucp_Limbu },
{ 335, PT_SC, ucp_Linear_B },
{ 344, PT_PC, ucp_Ll },
{ 347, PT_PC, ucp_Lm },
{ 350, PT_PC, ucp_Lo },
{ 353, PT_PC, ucp_Lt },
{ 356, PT_PC, ucp_Lu },
{ 359, PT_GC, ucp_M },
{ 361, PT_SC, ucp_Malayalam },
{ 371, PT_PC, ucp_Mc },
{ 374, PT_PC, ucp_Me },
{ 377, PT_PC, ucp_Mn },
{ 380, PT_SC, ucp_Mongolian },
{ 390, PT_SC, ucp_Myanmar },
{ 398, PT_GC, ucp_N },
{ 400, PT_PC, ucp_Nd },
{ 403, PT_SC, ucp_New_Tai_Lue },
{ 415, PT_SC, ucp_Nko },
{ 419, PT_PC, ucp_Nl },
{ 422, PT_PC, ucp_No },
{ 425, PT_SC, ucp_Ogham },
{ 431, PT_SC, ucp_Old_Italic },
{ 442, PT_SC, ucp_Old_Persian },
{ 454, PT_SC, ucp_Oriya },
{ 460, PT_SC, ucp_Osmanya },
{ 468, PT_GC, ucp_P },
{ 470, PT_PC, ucp_Pc },
{ 473, PT_PC, ucp_Pd },
{ 476, PT_PC, ucp_Pe },
{ 479, PT_PC, ucp_Pf },
{ 482, PT_SC, ucp_Phags_Pa },
{ 491, PT_SC, ucp_Phoenician },
{ 502, PT_PC, ucp_Pi },
{ 505, PT_PC, ucp_Po },
{ 508, PT_PC, ucp_Ps },
{ 511, PT_SC, ucp_Runic },
{ 517, PT_GC, ucp_S },
{ 519, PT_PC, ucp_Sc },
{ 522, PT_SC, ucp_Shavian },
{ 530, PT_SC, ucp_Sinhala },
{ 538, PT_PC, ucp_Sk },
{ 541, PT_PC, ucp_Sm },
{ 544, PT_PC, ucp_So },
{ 547, PT_SC, ucp_Syloti_Nagri },
{ 560, PT_SC, ucp_Syriac },
{ 567, PT_SC, ucp_Tagalog },
{ 575, PT_SC, ucp_Tagbanwa },
{ 584, PT_SC, ucp_Tai_Le },
{ 591, PT_SC, ucp_Tamil },
{ 597, PT_SC, ucp_Telugu },
{ 604, PT_SC, ucp_Thaana },
{ 611, PT_SC, ucp_Thai },
{ 616, PT_SC, ucp_Tibetan },
{ 624, PT_SC, ucp_Tifinagh },
{ 633, PT_SC, ucp_Ugaritic },
{ 642, PT_SC, ucp_Yi },
{ 645, PT_GC, ucp_Z },
{ 647, PT_PC, ucp_Zl },
{ 650, PT_PC, ucp_Zp },
{ 653, PT_PC, ucp_Zs }
{ 0, PT_ANY, 0 },
{ 4, PT_SC, ucp_Arabic },
{ 11, PT_SC, ucp_Armenian },
{ 20, PT_SC, ucp_Balinese },
{ 29, PT_SC, ucp_Bengali },
{ 37, PT_SC, ucp_Bopomofo },
{ 46, PT_SC, ucp_Braille },
{ 54, PT_SC, ucp_Buginese },
{ 63, PT_SC, ucp_Buhid },
{ 69, PT_GC, ucp_C },
{ 71, PT_SC, ucp_Canadian_Aboriginal },
{ 91, PT_SC, ucp_Carian },
{ 98, PT_PC, ucp_Cc },
{ 101, PT_PC, ucp_Cf },
{ 104, PT_SC, ucp_Cham },
{ 109, PT_SC, ucp_Cherokee },
{ 118, PT_PC, ucp_Cn },
{ 121, PT_PC, ucp_Co },
{ 124, PT_SC, ucp_Common },
{ 131, PT_SC, ucp_Coptic },
{ 138, PT_PC, ucp_Cs },
{ 141, PT_SC, ucp_Cuneiform },
{ 151, PT_SC, ucp_Cypriot },
{ 159, PT_SC, ucp_Cyrillic },
{ 168, PT_SC, ucp_Deseret },
{ 176, PT_SC, ucp_Devanagari },
{ 187, PT_SC, ucp_Ethiopic },
{ 196, PT_SC, ucp_Georgian },
{ 205, PT_SC, ucp_Glagolitic },
{ 216, PT_SC, ucp_Gothic },
{ 223, PT_SC, ucp_Greek },
{ 229, PT_SC, ucp_Gujarati },
{ 238, PT_SC, ucp_Gurmukhi },
{ 247, PT_SC, ucp_Han },
{ 251, PT_SC, ucp_Hangul },
{ 258, PT_SC, ucp_Hanunoo },
{ 266, PT_SC, ucp_Hebrew },
{ 273, PT_SC, ucp_Hiragana },
{ 282, PT_SC, ucp_Inherited },
{ 292, PT_SC, ucp_Kannada },
{ 300, PT_SC, ucp_Katakana },
{ 309, PT_SC, ucp_Kayah_Li },
{ 318, PT_SC, ucp_Kharoshthi },
{ 329, PT_SC, ucp_Khmer },
{ 335, PT_GC, ucp_L },
{ 337, PT_LAMP, 0 },
{ 340, PT_SC, ucp_Lao },
{ 344, PT_SC, ucp_Latin },
{ 350, PT_SC, ucp_Lepcha },
{ 357, PT_SC, ucp_Limbu },
{ 363, PT_SC, ucp_Linear_B },
{ 372, PT_PC, ucp_Ll },
{ 375, PT_PC, ucp_Lm },
{ 378, PT_PC, ucp_Lo },
{ 381, PT_PC, ucp_Lt },
{ 384, PT_PC, ucp_Lu },
{ 387, PT_SC, ucp_Lycian },
{ 394, PT_SC, ucp_Lydian },
{ 401, PT_GC, ucp_M },
{ 403, PT_SC, ucp_Malayalam },
{ 413, PT_PC, ucp_Mc },
{ 416, PT_PC, ucp_Me },
{ 419, PT_PC, ucp_Mn },
{ 422, PT_SC, ucp_Mongolian },
{ 432, PT_SC, ucp_Myanmar },
{ 440, PT_GC, ucp_N },
{ 442, PT_PC, ucp_Nd },
{ 445, PT_SC, ucp_New_Tai_Lue },
{ 457, PT_SC, ucp_Nko },
{ 461, PT_PC, ucp_Nl },
{ 464, PT_PC, ucp_No },
{ 467, PT_SC, ucp_Ogham },
{ 473, PT_SC, ucp_Ol_Chiki },
{ 482, PT_SC, ucp_Old_Italic },
{ 493, PT_SC, ucp_Old_Persian },
{ 505, PT_SC, ucp_Oriya },
{ 511, PT_SC, ucp_Osmanya },
{ 519, PT_GC, ucp_P },
{ 521, PT_PC, ucp_Pc },
{ 524, PT_PC, ucp_Pd },
{ 527, PT_PC, ucp_Pe },
{ 530, PT_PC, ucp_Pf },
{ 533, PT_SC, ucp_Phags_Pa },
{ 542, PT_SC, ucp_Phoenician },
{ 553, PT_PC, ucp_Pi },
{ 556, PT_PC, ucp_Po },
{ 559, PT_PC, ucp_Ps },
{ 562, PT_SC, ucp_Rejang },
{ 569, PT_SC, ucp_Runic },
{ 575, PT_GC, ucp_S },
{ 577, PT_SC, ucp_Saurashtra },
{ 588, PT_PC, ucp_Sc },
{ 591, PT_SC, ucp_Shavian },
{ 599, PT_SC, ucp_Sinhala },
{ 607, PT_PC, ucp_Sk },
{ 610, PT_PC, ucp_Sm },
{ 613, PT_PC, ucp_So },
{ 616, PT_SC, ucp_Sundanese },
{ 626, PT_SC, ucp_Syloti_Nagri },
{ 639, PT_SC, ucp_Syriac },
{ 646, PT_SC, ucp_Tagalog },
{ 654, PT_SC, ucp_Tagbanwa },
{ 663, PT_SC, ucp_Tai_Le },
{ 670, PT_SC, ucp_Tamil },
{ 676, PT_SC, ucp_Telugu },
{ 683, PT_SC, ucp_Thaana },
{ 690, PT_SC, ucp_Thai },
{ 695, PT_SC, ucp_Tibetan },
{ 703, PT_SC, ucp_Tifinagh },
{ 712, PT_SC, ucp_Ugaritic },
{ 721, PT_SC, ucp_Vai },
{ 725, PT_SC, ucp_Yi },
{ 728, PT_GC, ucp_Z },
{ 730, PT_PC, ucp_Zl },
{ 733, PT_PC, ucp_Zp },
{ 736, PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);

View File

@ -43,58 +43,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains code for searching the table of Unicode character
properties. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
#include "ucp.h" /* Category definitions */
#include "ucpinternal.h" /* Internal table details */
/* Table to translate from particular type value to the general value. */
static int ucp_gentype[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/*************************************************
* Search table and return type *
*************************************************/
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
Arguments:
c the character value
type_ptr the detailed character type is returned here
script_ptr the script is returned here
Returns: the character type category
*/
int
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
{
/* Note that the Unicode types have the same values in glib and in
* PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
* ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
*type_ptr = g_unichar_type(c);
*script_ptr = g_unichar_get_script(c);
return ucp_gentype[*type_ptr];
}
/*************************************************
@ -113,7 +64,7 @@ Returns: the other case or NOTACHAR if none
unsigned int
_pcre_ucp_othercase(const unsigned int c)
{
int other_case = NOTACHAR;
unsigned int other_case = NOTACHAR;
if (g_unichar_islower(c))
other_case = g_unichar_toupper(c);

View File

@ -1,4 +1,3 @@
#include "config.h"
#include "pcre_internal.h"
/*

View File

@ -79,7 +79,7 @@ I could find no way of detecting that a macro is defined as an empty string at
pre-processor time. This hack uses a standard trick for avoiding calling
the STRING macro with an empty argument when doing the test. */
PCRE_EXP_DEFN const char *
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre_version(void)
{
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?

View File

@ -104,9 +104,7 @@ while ((t = *data++) != XCL_END)
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int chartype = UCD_CHARTYPE(c);
switch(*data)
{
case PT_ANY:
@ -119,7 +117,7 @@ while ((t = *data++) != XCL_END)
break;
case PT_GC:
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
if ((data[1] == _pcre_ucp_gentype[chartype]) == (t == XCL_PROP)) return !negated;
break;
case PT_PC:
@ -127,7 +125,7 @@ while ((t = *data++) != XCL_END)
break;
case PT_SC:
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
break;
/* This should never occur, but compilers may mutter if there is no

View File

@ -125,7 +125,18 @@ enum {
ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, /* New for Unicode 5.0.0 */
ucp_Nko = G_UNICODE_SCRIPT_NKO, /* New for Unicode 5.0.0 */
ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, /* New for Unicode 5.0.0 */
ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN /* New for Unicode 5.0.0 */
ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN, /* New for Unicode 5.0.0 */
ucp_Carian = G_UNICODE_SCRIPT_CARIAN, /* New for Unicode 5.1 */
ucp_Cham = G_UNICODE_SCRIPT_CHAM, /* New for Unicode 5.1 */
ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI, /* New for Unicode 5.1 */
ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA, /* New for Unicode 5.1 */
ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN, /* New for Unicode 5.1 */
ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN, /* New for Unicode 5.1 */
ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI, /* New for Unicode 5.1 */
ucp_Rejang = G_UNICODE_SCRIPT_REJANG, /* New for Unicode 5.1 */
ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA, /* New for Unicode 5.1 */
ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE, /* New for Unicode 5.1 */
ucp_Vai = G_UNICODE_SCRIPT_VAI /* New for Unicode 5.1 */
};
#endif