mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2024-12-25 15:06:14 +01:00
Update PCRE to 7.8
svn path=/trunk/; revision=7813
This commit is contained in:
parent
1da8112081
commit
d6f23279e7
@ -1,3 +1,7 @@
|
||||
2009-01-18 Matthias Clasen <mclasen@redhat.com>
|
||||
|
||||
* glib/pcre: Update to PCRE 7.8
|
||||
|
||||
2009-01-17 Matthias Clasen <mclasen@redhat.com>
|
||||
|
||||
Bug 567977 – textdomain() macro should not return NULL when
|
||||
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 7
|
||||
#define PCRE_MINOR 8
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2008-05-07
|
||||
#define PCRE_DATE 2008-09-05
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
|
@ -1,6 +1,3 @@
|
||||
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||
* the update of the local copy of PCRE.
|
||||
*/
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
@ -331,7 +331,7 @@ static const char *
|
||||
find_error_text(int n)
|
||||
{
|
||||
const char *s = error_texts;
|
||||
for (; n > 0; n--) while (*s++ != 0);
|
||||
for (; n > 0; n--) while (*s++ != 0) {};
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -437,7 +437,7 @@ else
|
||||
{
|
||||
const uschar *p;
|
||||
for (p = ptr+2; *p != 0 && *p != '}'; p++)
|
||||
if (*p != '-' && g_ascii_isdigit(*p) == 0) break;
|
||||
if (*p != '-' && g_ascii_isdigit (*p) == 0) break;
|
||||
if (*p != 0 && *p != '}')
|
||||
{
|
||||
c = -ESC_k;
|
||||
@ -456,7 +456,7 @@ else
|
||||
else negated = FALSE;
|
||||
|
||||
c = 0;
|
||||
while (g_ascii_isdigit(ptr[1]) != 0)
|
||||
while (g_ascii_isdigit (ptr[1]) != 0)
|
||||
c = c * 10 + *(++ptr) - '0';
|
||||
|
||||
if (c < 0) /* Integer overflow */
|
||||
@ -509,7 +509,7 @@ else
|
||||
{
|
||||
oldptr = ptr;
|
||||
c -= '0';
|
||||
while (g_ascii_isdigit(ptr[1]) != 0)
|
||||
while (g_ascii_isdigit (ptr[1]))
|
||||
c = c * 10 + *(++ptr) - '0';
|
||||
if (c < 0) /* Integer overflow */
|
||||
{
|
||||
@ -559,7 +559,7 @@ else
|
||||
int count = 0;
|
||||
|
||||
c = 0;
|
||||
while (g_ascii_isxdigit(*pt) != 0)
|
||||
while (g_ascii_isxdigit (*pt) != 0)
|
||||
{
|
||||
register int cc = *pt++;
|
||||
if (c == 0 && cc == '0') continue; /* Leading zeroes */
|
||||
@ -588,7 +588,7 @@ else
|
||||
/* Read just a single-byte hex-defined char */
|
||||
|
||||
c = 0;
|
||||
while (i++ < 2 && g_ascii_isxdigit(ptr[1]) != 0)
|
||||
while (i++ < 2 && g_ascii_isxdigit (ptr[1]) != 0)
|
||||
{
|
||||
int cc; /* Some compilers don't like ++ */
|
||||
cc = *(++ptr); /* in initializers */
|
||||
@ -757,15 +757,15 @@ Returns: TRUE or FALSE
|
||||
static BOOL
|
||||
is_counted_repeat(const uschar *p)
|
||||
{
|
||||
if (g_ascii_isdigit(*p++) == 0) return FALSE;
|
||||
while (g_ascii_isdigit(*p) != 0) p++;
|
||||
if (g_ascii_isdigit (*p++) == 0) return FALSE;
|
||||
while (g_ascii_isdigit (*p) != 0) p++;
|
||||
if (*p == '}') return TRUE;
|
||||
|
||||
if (*p++ != ',') return FALSE;
|
||||
if (*p == '}') return TRUE;
|
||||
|
||||
if (g_ascii_isdigit(*p++) == 0) return FALSE;
|
||||
while (g_ascii_isdigit(*p) != 0) p++;
|
||||
if (g_ascii_isdigit (*p++) == 0) return FALSE;
|
||||
while (g_ascii_isdigit (*p) != 0) p++;
|
||||
|
||||
return (*p == '}');
|
||||
}
|
||||
@ -800,7 +800,7 @@ int max = -1;
|
||||
/* Read the minimum value and do a paranoid check: a negative value indicates
|
||||
an integer overflow. */
|
||||
|
||||
while (g_ascii_isdigit(*p) != 0) min = min * 10 + *p++ - '0';
|
||||
while (g_ascii_isdigit (*p) != 0) min = min * 10 + *p++ - '0';
|
||||
if (min < 0 || min > 65535)
|
||||
{
|
||||
*errorcodeptr = ERR5;
|
||||
@ -815,7 +815,7 @@ if (*p == '}') max = min; else
|
||||
if (*(++p) != '}')
|
||||
{
|
||||
max = 0;
|
||||
while(g_ascii_isdigit(*p) != 0) max = max * 10 + *p++ - '0';
|
||||
while(g_ascii_isdigit (*p) != 0) max = max * 10 + *p++ - '0';
|
||||
if (max < 0 || max > 65535)
|
||||
{
|
||||
*errorcodeptr = ERR5;
|
||||
@ -878,7 +878,7 @@ for (; *ptr != 0; ptr++)
|
||||
if (*(++ptr) == 0) return -1;
|
||||
if (*ptr == 'Q') for (;;)
|
||||
{
|
||||
while (*(++ptr) != 0 && *ptr != '\\');
|
||||
while (*(++ptr) != 0 && *ptr != '\\') {};
|
||||
if (*ptr == 0) return -1;
|
||||
if (*(++ptr) == 'E') break;
|
||||
}
|
||||
@ -921,7 +921,7 @@ for (; *ptr != 0; ptr++)
|
||||
if (*(++ptr) == 0) return -1;
|
||||
if (*ptr == 'Q') for (;;)
|
||||
{
|
||||
while (*(++ptr) != 0 && *ptr != '\\');
|
||||
while (*(++ptr) != 0 && *ptr != '\\') {};
|
||||
if (*ptr == 0) return -1;
|
||||
if (*(++ptr) == 'E') break;
|
||||
}
|
||||
@ -935,7 +935,7 @@ for (; *ptr != 0; ptr++)
|
||||
|
||||
if (xmode && *ptr == '#')
|
||||
{
|
||||
while (*(++ptr) != 0 && *ptr != '\n');
|
||||
while (*(++ptr) != 0 && *ptr != '\n') {};
|
||||
if (*ptr == 0) return -1;
|
||||
continue;
|
||||
}
|
||||
@ -1326,6 +1326,8 @@ for (;;)
|
||||
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf8); /* Keep compiler happy by referencing function argument */
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -1419,6 +1421,8 @@ for (;;)
|
||||
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf8); /* Keep compiler happy by referencing function argument */
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -1891,7 +1895,7 @@ get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,
|
||||
unsigned int c, othercase, next;
|
||||
|
||||
for (c = *cptr; c <= d; c++)
|
||||
{ if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }
|
||||
{ if ((othercase = UCD_OTHERCASE(c)) != c) break; }
|
||||
|
||||
if (c > d) return FALSE;
|
||||
|
||||
@ -1900,7 +1904,7 @@ next = othercase + 1;
|
||||
|
||||
for (++c; c <= d; c++)
|
||||
{
|
||||
if (_pcre_ucp_othercase(c) != next) break;
|
||||
if (UCD_OTHERCASE(c) != next) break;
|
||||
next++;
|
||||
}
|
||||
|
||||
@ -2010,6 +2014,8 @@ if (next >= 0) switch(op_code)
|
||||
case OP_CHAR:
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
|
||||
#else
|
||||
(void)(utf8_char); /* Keep compiler happy by referencing function argument */
|
||||
#endif
|
||||
return item != next;
|
||||
|
||||
@ -2028,7 +2034,7 @@ if (next >= 0) switch(op_code)
|
||||
unsigned int othercase;
|
||||
if (next < 128) othercase = cd->fcc[next]; else
|
||||
#ifdef SUPPORT_UCP
|
||||
othercase = _pcre_ucp_othercase((unsigned int)next);
|
||||
othercase = UCD_OTHERCASE((unsigned int)next);
|
||||
#else
|
||||
othercase = NOTACHAR;
|
||||
#endif
|
||||
@ -2049,7 +2055,7 @@ if (next >= 0) switch(op_code)
|
||||
unsigned int othercase;
|
||||
if (next < 128) othercase = cd->fcc[next]; else
|
||||
#ifdef SUPPORT_UCP
|
||||
othercase = _pcre_ucp_othercase(next);
|
||||
othercase = UCD_OTHERCASE(next);
|
||||
#else
|
||||
othercase = NOTACHAR;
|
||||
#endif
|
||||
@ -3215,7 +3221,7 @@ for (;; ptr++)
|
||||
if ((options & PCRE_CASELESS) != 0)
|
||||
{
|
||||
unsigned int othercase;
|
||||
if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)
|
||||
if ((othercase = UCD_OTHERCASE(c)) != c)
|
||||
{
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
|
||||
@ -4092,7 +4098,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
const char *vn = verbnames;
|
||||
const uschar *name = ++ptr;
|
||||
previous = NULL;
|
||||
while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
|
||||
while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
|
||||
if (*ptr == ':')
|
||||
{
|
||||
*errorcodeptr = ERR59; /* Not supported */
|
||||
@ -4230,7 +4236,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
while ((cd->ctypes[*ptr] & ctype_word) != 0)
|
||||
{
|
||||
if (recno >= 0)
|
||||
recno = (g_ascii_isdigit(*ptr) != 0)?
|
||||
recno = (g_ascii_isdigit (*ptr) != 0)?
|
||||
recno * 10 + *ptr - '0' : -1;
|
||||
ptr++;
|
||||
}
|
||||
@ -4315,7 +4321,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
recno = 0;
|
||||
for (i = 1; i < namelen; i++)
|
||||
{
|
||||
if (g_ascii_isdigit(name[i]) == 0)
|
||||
if (g_ascii_isdigit (name[i]) == 0)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
goto FAILED;
|
||||
@ -4411,7 +4417,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
*code++ = OP_CALLOUT;
|
||||
{
|
||||
int n = 0;
|
||||
while (g_ascii_isdigit(*(++ptr)) != 0)
|
||||
while (g_ascii_isdigit (*(++ptr)) != 0)
|
||||
n = n * 10 + *ptr - '0';
|
||||
if (*ptr != ')')
|
||||
{
|
||||
@ -4626,7 +4632,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
if ((refsign = *ptr) == '+')
|
||||
{
|
||||
ptr++;
|
||||
if (g_ascii_isdigit(*ptr) == 0)
|
||||
if (g_ascii_isdigit (*ptr) == 0)
|
||||
{
|
||||
*errorcodeptr = ERR63;
|
||||
goto FAILED;
|
||||
@ -4634,13 +4640,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
}
|
||||
else if (refsign == '-')
|
||||
{
|
||||
if (g_ascii_isdigit(ptr[1]) == 0)
|
||||
if (g_ascii_isdigit (ptr[1]) == 0)
|
||||
goto OTHER_CHAR_AFTER_QUERY;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
recno = 0;
|
||||
while(g_ascii_isdigit(*ptr) != 0)
|
||||
while(g_ascii_isdigit (*ptr) != 0)
|
||||
recno = recno * 10 + *ptr++ - '0';
|
||||
|
||||
if (*ptr != terminator)
|
||||
@ -4796,10 +4802,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
both phases.
|
||||
|
||||
If we are not at the pattern start, compile code to change the ims
|
||||
options if this setting actually changes any of them. We also pass the
|
||||
new setting back so that it can be put at the start of any following
|
||||
branches, and when this group ends (if we are in a group), a resetting
|
||||
item can be compiled. */
|
||||
options if this setting actually changes any of them, and reset the
|
||||
greedy defaults and the case value for firstbyte and reqbyte. */
|
||||
|
||||
if (*ptr == ')')
|
||||
{
|
||||
@ -4807,7 +4811,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
(lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
|
||||
{
|
||||
cd->external_options = newoptions;
|
||||
options = *optionsptr = newoptions;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -4816,17 +4819,17 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
*code++ = OP_OPT;
|
||||
*code++ = newoptions & PCRE_IMS;
|
||||
}
|
||||
|
||||
/* Change options at this level, and pass them back for use
|
||||
in subsequent branches. Reset the greedy defaults and the case
|
||||
value for firstbyte and reqbyte. */
|
||||
|
||||
*optionsptr = options = newoptions;
|
||||
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
|
||||
greedy_non_default = greedy_default ^ 1;
|
||||
req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
|
||||
req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
|
||||
}
|
||||
|
||||
/* Change options at this level, and pass them back for use
|
||||
in subsequent branches. When not at the start of the pattern, this
|
||||
information is also necessary so that a resetting item can be
|
||||
compiled at the end of a group (if we are in a group). */
|
||||
|
||||
*optionsptr = options = newoptions;
|
||||
previous = NULL; /* This item can't be repeated */
|
||||
continue; /* It is complete */
|
||||
}
|
||||
@ -5115,7 +5118,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
/* Test a signed number in angle brackets or quotes. */
|
||||
|
||||
p = ptr + 2;
|
||||
while (g_ascii_isdigit(*p) != 0) p++;
|
||||
while (g_ascii_isdigit (*p) != 0) p++;
|
||||
if (*p != terminator)
|
||||
{
|
||||
*errorcodeptr = ERR57;
|
||||
@ -5820,7 +5823,7 @@ Returns: pointer to compiled data block, or NULL on error,
|
||||
with errorptr and erroroffset set
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN pcre *
|
||||
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
|
||||
pcre_compile(const char *pattern, int options, const char **errorptr,
|
||||
int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
@ -5828,7 +5831,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
|
||||
}
|
||||
|
||||
|
||||
PCRE_EXP_DEFN pcre *
|
||||
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
|
||||
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
|
||||
const char **errorptr, int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
|
@ -62,7 +62,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_config(int what, void *where)
|
||||
{
|
||||
switch (what)
|
||||
|
@ -512,9 +512,6 @@ for (;;)
|
||||
const uschar *code;
|
||||
int state_offset = current_state->offset;
|
||||
int count, codevalue;
|
||||
#ifdef SUPPORT_UCP
|
||||
int chartype, script;
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
|
||||
@ -825,7 +822,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
int chartype = UCD_CHARTYPE(c);
|
||||
switch(code[1])
|
||||
{
|
||||
case PT_ANY:
|
||||
@ -837,7 +834,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[2];
|
||||
OK = _pcre_ucp_gentype[chartype] == code[2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
@ -845,7 +842,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[2];
|
||||
OK = UCD_SCRIPT(c) == code[2];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
@ -994,7 +991,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
int chartype = UCD_CHARTYPE(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_ANY:
|
||||
@ -1006,7 +1003,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[3];
|
||||
OK = _pcre_ucp_gentype[chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
@ -1014,7 +1011,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[3];
|
||||
OK = UCD_SCRIPT(c) == code[3];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
@ -1043,7 +1040,7 @@ for (;;)
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@ -1057,7 +1054,7 @@ for (;;)
|
||||
int nd;
|
||||
int ndlen = 1;
|
||||
GETCHARLEN(nd, nptr, ndlen);
|
||||
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
|
||||
if (UCD_CATEGORY(nd) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += ndlen;
|
||||
}
|
||||
@ -1216,7 +1213,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
int chartype = UCD_CHARTYPE(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_ANY:
|
||||
@ -1228,7 +1225,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[3];
|
||||
OK = _pcre_ucp_gentype[chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
@ -1236,7 +1233,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[3];
|
||||
OK = UCD_SCRIPT(c) == code[3];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
@ -1274,7 +1271,7 @@ for (;;)
|
||||
QS2:
|
||||
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@ -1289,7 +1286,7 @@ for (;;)
|
||||
int nd;
|
||||
int ndlen = 1;
|
||||
GETCHARLEN(nd, nptr, ndlen);
|
||||
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
|
||||
if (UCD_CATEGORY(nd) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += ndlen;
|
||||
}
|
||||
@ -1463,7 +1460,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
int chartype = UCD_CHARTYPE(c);
|
||||
switch(code[4])
|
||||
{
|
||||
case PT_ANY:
|
||||
@ -1475,7 +1472,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[5];
|
||||
OK = _pcre_ucp_gentype[chartype] == code[5];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
@ -1483,7 +1480,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[5];
|
||||
OK = UCD_SCRIPT(c) == code[5];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
@ -1516,7 +1513,7 @@ for (;;)
|
||||
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 4, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@ -1530,7 +1527,7 @@ for (;;)
|
||||
int nd;
|
||||
int ndlen = 1;
|
||||
GETCHARLEN(nd, nptr, ndlen);
|
||||
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
|
||||
if (UCD_CATEGORY(nd) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += ndlen;
|
||||
}
|
||||
@ -1710,7 +1707,7 @@ for (;;)
|
||||
other case of the character. */
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
othercase = _pcre_ucp_othercase(c);
|
||||
othercase = UCD_OTHERCASE(c);
|
||||
#else
|
||||
othercase = NOTACHAR;
|
||||
#endif
|
||||
@ -1735,7 +1732,7 @@ for (;;)
|
||||
to wait for them to pass before continuing. */
|
||||
|
||||
case OP_EXTUNI:
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@ -1743,7 +1740,7 @@ for (;;)
|
||||
{
|
||||
int nclen = 1;
|
||||
GETCHARLEN(c, nptr, nclen);
|
||||
if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
|
||||
if (UCD_CATEGORY(c) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += nclen;
|
||||
}
|
||||
@ -1911,7 +1908,7 @@ for (;;)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@ -1949,7 +1946,7 @@ for (;;)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@ -1985,7 +1982,7 @@ for (;;)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@ -2017,7 +2014,7 @@ for (;;)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@ -2052,7 +2049,7 @@ for (;;)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@ -2508,7 +2505,7 @@ Returns: > 0 => number of match offset pairs placed in offsets
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
const char *subject, int length, int start_offset, int options, int *offsets,
|
||||
int offsetcount, int *workspace, int wscount)
|
||||
@ -2736,7 +2733,18 @@ for (;;)
|
||||
|
||||
if (firstline)
|
||||
{
|
||||
const uschar *t = current_subject;
|
||||
USPTR t = current_subject;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
while (t < md->end_subject && !IS_NEWLINE(t))
|
||||
{
|
||||
t++;
|
||||
while (t < end_subject && (*t & 0xc0) == 0x80) t++;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
@ -2758,7 +2766,20 @@ for (;;)
|
||||
{
|
||||
if (current_subject > md->start_subject + start_offset)
|
||||
{
|
||||
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
|
||||
{
|
||||
current_subject++;
|
||||
while(current_subject < end_subject &&
|
||||
(*current_subject & 0xc0) == 0x80)
|
||||
current_subject++;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
|
||||
current_subject++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or
|
||||
|
@ -158,13 +158,39 @@ printf("\n");
|
||||
|
||||
if (length > md->end_subject - eptr) return FALSE;
|
||||
|
||||
/* Separate the caselesss case for speed */
|
||||
/* Separate the caseless case for speed. In UTF-8 mode we can only do this
|
||||
properly if Unicode properties are supported. Otherwise, we can check only
|
||||
ASCII characters. */
|
||||
|
||||
if ((ims & PCRE_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
#ifdef SUPPORT_UCP
|
||||
if (md->utf8)
|
||||
{
|
||||
USPTR endptr = eptr + length;
|
||||
while (eptr < endptr)
|
||||
{
|
||||
int c, d;
|
||||
GETCHARINC(c, eptr);
|
||||
GETCHARINC(d, p);
|
||||
if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
|
||||
is no UCP support. */
|
||||
|
||||
while (length-- > 0)
|
||||
if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
|
||||
{ if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
|
||||
}
|
||||
|
||||
/* In the caseful case, we can just compare the bytes, whether or not we
|
||||
are in UTF-8 mode. */
|
||||
|
||||
else
|
||||
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
|
||||
|
||||
@ -1653,9 +1679,7 @@ for (;;)
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
{
|
||||
int chartype, script;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
|
||||
int chartype = UCD_CHARTYPE(c);
|
||||
switch(ecode[1])
|
||||
{
|
||||
case PT_ANY:
|
||||
@ -1670,7 +1694,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((ecode[2] != category) == (op == OP_PROP))
|
||||
if ((ecode[2] != _pcre_ucp_gentype[chartype]) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
@ -1680,7 +1704,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((ecode[2] != script) == (op == OP_PROP))
|
||||
if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
@ -1699,8 +1723,7 @@ for (;;)
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
{
|
||||
int chartype, script;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
int category = UCD_CATEGORY(c);
|
||||
if (category == ucp_M) RRETURN(MATCH_NOMATCH);
|
||||
while (eptr < md->end_subject)
|
||||
{
|
||||
@ -1709,7 +1732,7 @@ for (;;)
|
||||
{
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
category = UCD_CATEGORY(c);
|
||||
if (category != ucp_M) break;
|
||||
eptr += len;
|
||||
}
|
||||
@ -2174,7 +2197,7 @@ for (;;)
|
||||
if (fc != dc)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
if (dc != _pcre_ucp_othercase(fc))
|
||||
if (dc != UCD_OTHERCASE(fc))
|
||||
#endif
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@ -2265,7 +2288,7 @@ for (;;)
|
||||
#ifdef SUPPORT_UCP
|
||||
unsigned int othercase;
|
||||
if ((ims & PCRE_CASELESS) != 0 &&
|
||||
(othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
|
||||
(othercase = UCD_OTHERCASE(fc)) != fc)
|
||||
oclength = _pcre_ord2utf8(othercase, occhars);
|
||||
else oclength = 0;
|
||||
#endif /* SUPPORT_UCP */
|
||||
@ -2585,10 +2608,11 @@ for (;;)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(d, eptr);
|
||||
if (d < 256) d = md->lcc[d];
|
||||
if (fi >= max || eptr >= md->end_subject || fc == d)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
if (fc == d) RRETURN(MATCH_NOMATCH);
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -2694,9 +2718,9 @@ for (;;)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(d, eptr);
|
||||
if (fi >= max || eptr >= md->end_subject || fc == d)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
if (fc == d) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -2870,7 +2894,7 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == ucp_Lu ||
|
||||
prop_chartype == ucp_Ll ||
|
||||
prop_chartype == ucp_Lt) == prop_fail_result)
|
||||
@ -2883,7 +2907,7 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if ((prop_category == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@ -2894,7 +2918,7 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@ -2905,7 +2929,7 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_script = UCD_SCRIPT(c);
|
||||
if ((prop_script == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@ -2924,7 +2948,7 @@ for (;;)
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
|
||||
while (eptr < md->end_subject)
|
||||
{
|
||||
@ -2933,7 +2957,7 @@ for (;;)
|
||||
{
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category != ucp_M) break;
|
||||
eptr += len;
|
||||
}
|
||||
@ -3349,7 +3373,7 @@ for (;;)
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == ucp_Lu ||
|
||||
prop_chartype == ucp_Ll ||
|
||||
prop_chartype == ucp_Lt) == prop_fail_result)
|
||||
@ -3364,7 +3388,7 @@ for (;;)
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if ((prop_category == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@ -3377,7 +3401,7 @@ for (;;)
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@ -3390,7 +3414,7 @@ for (;;)
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_script = UCD_SCRIPT(c);
|
||||
if ((prop_script == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@ -3412,7 +3436,7 @@ for (;;)
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
|
||||
while (eptr < md->end_subject)
|
||||
{
|
||||
@ -3421,7 +3445,7 @@ for (;;)
|
||||
{
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category != ucp_M) break;
|
||||
eptr += len;
|
||||
}
|
||||
@ -3739,7 +3763,7 @@ for (;;)
|
||||
int len = 1;
|
||||
if (eptr >= md->end_subject) break;
|
||||
GETCHARLEN(c, eptr, len);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == ucp_Lu ||
|
||||
prop_chartype == ucp_Ll ||
|
||||
prop_chartype == ucp_Lt) == prop_fail_result)
|
||||
@ -3754,7 +3778,7 @@ for (;;)
|
||||
int len = 1;
|
||||
if (eptr >= md->end_subject) break;
|
||||
GETCHARLEN(c, eptr, len);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if ((prop_category == prop_value) == prop_fail_result)
|
||||
break;
|
||||
eptr+= len;
|
||||
@ -3767,7 +3791,7 @@ for (;;)
|
||||
int len = 1;
|
||||
if (eptr >= md->end_subject) break;
|
||||
GETCHARLEN(c, eptr, len);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == prop_value) == prop_fail_result)
|
||||
break;
|
||||
eptr+= len;
|
||||
@ -3780,7 +3804,7 @@ for (;;)
|
||||
int len = 1;
|
||||
if (eptr >= md->end_subject) break;
|
||||
GETCHARLEN(c, eptr, len);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_script = UCD_SCRIPT(c);
|
||||
if ((prop_script == prop_value) == prop_fail_result)
|
||||
break;
|
||||
eptr+= len;
|
||||
@ -3809,7 +3833,7 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject) break;
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category == ucp_M) break;
|
||||
while (eptr < md->end_subject)
|
||||
{
|
||||
@ -3818,7 +3842,7 @@ for (;;)
|
||||
{
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category != ucp_M) break;
|
||||
eptr += len;
|
||||
}
|
||||
@ -3840,7 +3864,7 @@ for (;;)
|
||||
BACKCHAR(eptr);
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category != ucp_M) break;
|
||||
eptr--;
|
||||
}
|
||||
@ -4360,7 +4384,7 @@ Returns: > 0 => success; value is the number of elements filled in
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
|
||||
int offsetcount)
|
||||
@ -4672,31 +4696,53 @@ for(;;)
|
||||
if (firstline)
|
||||
{
|
||||
USPTR t = start_match;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
while (t < md->end_subject && !IS_NEWLINE(t))
|
||||
{
|
||||
t++;
|
||||
while (t < end_subject && (*t & 0xc0) == 0x80) t++;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
|
||||
/* Now test for a unique first byte */
|
||||
/* Now advance to a unique first byte if there is one. */
|
||||
|
||||
if (first_byte >= 0)
|
||||
{
|
||||
if (first_byte_caseless)
|
||||
while (start_match < end_subject &&
|
||||
md->lcc[*start_match] != first_byte)
|
||||
{ NEXTCHAR(start_match); }
|
||||
while (start_match < end_subject && md->lcc[*start_match] != first_byte)
|
||||
start_match++;
|
||||
else
|
||||
while (start_match < end_subject && *start_match != first_byte)
|
||||
{ NEXTCHAR(start_match); }
|
||||
start_match++;
|
||||
}
|
||||
|
||||
/* Or to just after a linebreak for a multiline match if possible */
|
||||
/* Or to just after a linebreak for a multiline match */
|
||||
|
||||
else if (startline)
|
||||
{
|
||||
if (start_match > md->start_subject + start_offset)
|
||||
{
|
||||
while (start_match <= end_subject && !WAS_NEWLINE(start_match))
|
||||
{ NEXTCHAR(start_match); }
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
{
|
||||
start_match++;
|
||||
while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
|
||||
start_match++;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
start_match++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or ANYCRLF,
|
||||
and we are now at a LF, advance the match position by one more character.
|
||||
@ -4710,16 +4756,15 @@ for(;;)
|
||||
}
|
||||
}
|
||||
|
||||
/* Or to a non-unique first char after study */
|
||||
/* Or to a non-unique first byte after study */
|
||||
|
||||
else if (start_bits != NULL)
|
||||
{
|
||||
while (start_match < end_subject)
|
||||
{
|
||||
register unsigned int c = *start_match;
|
||||
if ((start_bits[c/8] & (1 << (c&7))) == 0)
|
||||
{ NEXTCHAR(start_match); }
|
||||
else break;
|
||||
if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||
void *where)
|
||||
{
|
||||
|
@ -65,7 +65,7 @@ Returns: the number of the named parentheses, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||
{
|
||||
int rc;
|
||||
@ -114,7 +114,7 @@ Returns: the length of each entry, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||
char **firstptr, char **lastptr)
|
||||
{
|
||||
@ -231,7 +231,7 @@ Returns: if successful:
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, char *buffer, int size)
|
||||
{
|
||||
@ -276,7 +276,7 @@ Returns: if successful:
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, char *buffer, int size)
|
||||
{
|
||||
@ -308,7 +308,7 @@ Returns: if successful: 0
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||
const char ***listptr)
|
||||
{
|
||||
@ -353,7 +353,7 @@ Argument: the result of a previous pcre_get_substring_list()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre_free_substring_list(const char **pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
@ -386,7 +386,7 @@ Returns: if successful:
|
||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, const char **stringptr)
|
||||
{
|
||||
@ -433,7 +433,7 @@ Returns: if successful:
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, const char **stringptr)
|
||||
{
|
||||
@ -456,7 +456,7 @@ Argument: the result of a previous pcre_get_substring()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre_free_substring(const char *pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
|
@ -52,8 +52,6 @@ differently, and global variables are not used (see pcre.in). */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
||||
|
@ -72,7 +72,7 @@ Returns: number of capturing subpatterns
|
||||
or negative values on error
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
|
@ -132,6 +132,20 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order so make this easy, all the exported functions have
|
||||
PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE_CALL_CONVENTION
|
||||
#define PCRE_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
|
||||
cannot determine these outside the compilation (e.g. by running a program as
|
||||
part of "configure") because PCRE is often cross-compiled for use on other
|
||||
@ -140,16 +154,20 @@ preprocessor time in standard C environments. */
|
||||
|
||||
#if USHRT_MAX == 65535
|
||||
typedef unsigned short pcre_uint16;
|
||||
typedef short pcre_int16;
|
||||
#elif UINT_MAX == 65535
|
||||
typedef unsigned int pcre_uint16;
|
||||
typedef int pcre_int16;
|
||||
#else
|
||||
#error Cannot determine a type for 16-bit unsigned integers
|
||||
#endif
|
||||
|
||||
#if UINT_MAX == 4294967295
|
||||
typedef unsigned int pcre_uint32;
|
||||
typedef int pcre_int32;
|
||||
#elif ULONG_MAX == 4294967295
|
||||
typedef unsigned long int pcre_uint32;
|
||||
typedef long int pcre_int32;
|
||||
#else
|
||||
#error Cannot determine a type for 32-bit unsigned integers
|
||||
#endif
|
||||
@ -241,7 +259,6 @@ option on the command line. */
|
||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||
#define memcmp(s,c,n) _memcmp(s,c,n)
|
||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||
#define memmove(d,s,n) _memmove(d,s,n)
|
||||
#define memset(s,c,n) _memset(s,c,n)
|
||||
#else /* VPCOMPAT */
|
||||
|
||||
@ -363,7 +380,6 @@ never be called in byte mode. To make sure it can never even appear when UTF-8
|
||||
support is omitted, we don't even define it. */
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
#define NEXTCHAR(p) p++;
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
@ -373,13 +389,6 @@ support is omitted, we don't even define it. */
|
||||
|
||||
#else /* SUPPORT_UTF8 */
|
||||
|
||||
/* Advance a character pointer one byte in non-UTF-8 mode and by one character
|
||||
in UTF-8 mode. */
|
||||
|
||||
#define NEXTCHAR(p) \
|
||||
p++; \
|
||||
if (utf8) { while((*p & 0xc0) == 0x80) p++; }
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-8 mode. */
|
||||
|
||||
@ -549,7 +558,8 @@ variable-length repeat, or a anything other than literal characters. */
|
||||
#define REQ_CASELESS 0x0100 /* indicates caselessness */
|
||||
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
|
||||
|
||||
/* Miscellaneous definitions */
|
||||
/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
|
||||
environments where these macros are defined elsewhere. */
|
||||
|
||||
typedef gboolean BOOL;
|
||||
|
||||
@ -1123,12 +1133,24 @@ extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
|
||||
extern int _pcre_ord2utf8(int, uschar *);
|
||||
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||
const pcre_study_data *, pcre_study_data *);
|
||||
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
|
||||
extern unsigned int _pcre_ucp_othercase(const unsigned int);
|
||||
extern int _pcre_valid_utf8(const uschar *, int);
|
||||
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
|
||||
int *, BOOL);
|
||||
extern BOOL _pcre_xclass(int, const uschar *);
|
||||
extern unsigned int _pcre_ucp_othercase(unsigned int);
|
||||
|
||||
|
||||
extern const int _pcre_ucp_gentype[];
|
||||
|
||||
|
||||
/* UCD access macros */
|
||||
|
||||
#include "../glib.h"
|
||||
|
||||
#define UCD_CHARTYPE(ch) g_unichar_type(ch)
|
||||
#define UCD_SCRIPT(ch) g_unichar_get_script(ch)
|
||||
#define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
|
||||
#define UCD_OTHERCASE(ch) _pcre_ucp_othercase(ch)
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -78,8 +78,10 @@ for (j = i; j > 0; j--)
|
||||
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||
return i + 1;
|
||||
#else
|
||||
return 0; /* Keep compiler happy; this function won't ever be */
|
||||
#endif /* called when SUPPORT_UTF8 is not defined. */
|
||||
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
|
||||
(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* End of pcre_ord2utf8.c */
|
||||
|
@ -68,7 +68,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
|
||||
a negative error number
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_refcount(pcre *argument_re, int adjust)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)argument_re;
|
||||
|
@ -220,6 +220,7 @@ do
|
||||
/* SKIPZERO skips the bracket. */
|
||||
|
||||
case OP_SKIPZERO:
|
||||
tcode++;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
@ -503,7 +504,7 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN pcre_extra *
|
||||
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
|
@ -87,6 +87,19 @@ const uschar _pcre_utf8_table4[] = {
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
const int _pcre_ucp_gentype[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
/* The pcre_utt[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
@ -94,7 +107,10 @@ field of each entry. However, that leads to a large number of relocations when
|
||||
a shared library is dynamically loaded. A significant reduction is made by
|
||||
putting all the names into a single, large string and then using offsets in the
|
||||
table itself. Maintenance is more error-prone, but frequent changes to this
|
||||
data is unlikely. */
|
||||
data are unlikely.
|
||||
|
||||
July 2008: There is now a script called maint/GenerateUtt.py which can be used
|
||||
to generate this data instead of maintaining it entirely by hand. */
|
||||
|
||||
const char _pcre_utt_names[] =
|
||||
"Any\0"
|
||||
@ -108,8 +124,10 @@ const char _pcre_utt_names[] =
|
||||
"Buhid\0"
|
||||
"C\0"
|
||||
"Canadian_Aboriginal\0"
|
||||
"Carian\0"
|
||||
"Cc\0"
|
||||
"Cf\0"
|
||||
"Cham\0"
|
||||
"Cherokee\0"
|
||||
"Cn\0"
|
||||
"Co\0"
|
||||
@ -136,12 +154,14 @@ const char _pcre_utt_names[] =
|
||||
"Inherited\0"
|
||||
"Kannada\0"
|
||||
"Katakana\0"
|
||||
"Kayah_Li\0"
|
||||
"Kharoshthi\0"
|
||||
"Khmer\0"
|
||||
"L\0"
|
||||
"L&\0"
|
||||
"Lao\0"
|
||||
"Latin\0"
|
||||
"Lepcha\0"
|
||||
"Limbu\0"
|
||||
"Linear_B\0"
|
||||
"Ll\0"
|
||||
@ -149,6 +169,8 @@ const char _pcre_utt_names[] =
|
||||
"Lo\0"
|
||||
"Lt\0"
|
||||
"Lu\0"
|
||||
"Lycian\0"
|
||||
"Lydian\0"
|
||||
"M\0"
|
||||
"Malayalam\0"
|
||||
"Mc\0"
|
||||
@ -163,6 +185,7 @@ const char _pcre_utt_names[] =
|
||||
"Nl\0"
|
||||
"No\0"
|
||||
"Ogham\0"
|
||||
"Ol_Chiki\0"
|
||||
"Old_Italic\0"
|
||||
"Old_Persian\0"
|
||||
"Oriya\0"
|
||||
@ -177,14 +200,17 @@ const char _pcre_utt_names[] =
|
||||
"Pi\0"
|
||||
"Po\0"
|
||||
"Ps\0"
|
||||
"Rejang\0"
|
||||
"Runic\0"
|
||||
"S\0"
|
||||
"Saurashtra\0"
|
||||
"Sc\0"
|
||||
"Shavian\0"
|
||||
"Sinhala\0"
|
||||
"Sk\0"
|
||||
"Sm\0"
|
||||
"So\0"
|
||||
"Sundanese\0"
|
||||
"Syloti_Nagri\0"
|
||||
"Syriac\0"
|
||||
"Tagalog\0"
|
||||
@ -197,6 +223,7 @@ const char _pcre_utt_names[] =
|
||||
"Tibetan\0"
|
||||
"Tifinagh\0"
|
||||
"Ugaritic\0"
|
||||
"Vai\0"
|
||||
"Yi\0"
|
||||
"Z\0"
|
||||
"Zl\0"
|
||||
@ -204,111 +231,122 @@ const char _pcre_utt_names[] =
|
||||
"Zs\0";
|
||||
|
||||
const ucp_type_table _pcre_utt[] = {
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Balinese },
|
||||
{ 29, PT_SC, ucp_Bengali },
|
||||
{ 37, PT_SC, ucp_Bopomofo },
|
||||
{ 46, PT_SC, ucp_Braille },
|
||||
{ 54, PT_SC, ucp_Buginese },
|
||||
{ 63, PT_SC, ucp_Buhid },
|
||||
{ 69, PT_GC, ucp_C },
|
||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 91, PT_PC, ucp_Cc },
|
||||
{ 94, PT_PC, ucp_Cf },
|
||||
{ 97, PT_SC, ucp_Cherokee },
|
||||
{ 106, PT_PC, ucp_Cn },
|
||||
{ 109, PT_PC, ucp_Co },
|
||||
{ 112, PT_SC, ucp_Common },
|
||||
{ 119, PT_SC, ucp_Coptic },
|
||||
{ 126, PT_PC, ucp_Cs },
|
||||
{ 129, PT_SC, ucp_Cuneiform },
|
||||
{ 139, PT_SC, ucp_Cypriot },
|
||||
{ 147, PT_SC, ucp_Cyrillic },
|
||||
{ 156, PT_SC, ucp_Deseret },
|
||||
{ 164, PT_SC, ucp_Devanagari },
|
||||
{ 175, PT_SC, ucp_Ethiopic },
|
||||
{ 184, PT_SC, ucp_Georgian },
|
||||
{ 193, PT_SC, ucp_Glagolitic },
|
||||
{ 204, PT_SC, ucp_Gothic },
|
||||
{ 211, PT_SC, ucp_Greek },
|
||||
{ 217, PT_SC, ucp_Gujarati },
|
||||
{ 226, PT_SC, ucp_Gurmukhi },
|
||||
{ 235, PT_SC, ucp_Han },
|
||||
{ 239, PT_SC, ucp_Hangul },
|
||||
{ 246, PT_SC, ucp_Hanunoo },
|
||||
{ 254, PT_SC, ucp_Hebrew },
|
||||
{ 261, PT_SC, ucp_Hiragana },
|
||||
{ 270, PT_SC, ucp_Inherited },
|
||||
{ 280, PT_SC, ucp_Kannada },
|
||||
{ 288, PT_SC, ucp_Katakana },
|
||||
{ 297, PT_SC, ucp_Kharoshthi },
|
||||
{ 308, PT_SC, ucp_Khmer },
|
||||
{ 314, PT_GC, ucp_L },
|
||||
{ 316, PT_LAMP, 0 },
|
||||
{ 319, PT_SC, ucp_Lao },
|
||||
{ 323, PT_SC, ucp_Latin },
|
||||
{ 329, PT_SC, ucp_Limbu },
|
||||
{ 335, PT_SC, ucp_Linear_B },
|
||||
{ 344, PT_PC, ucp_Ll },
|
||||
{ 347, PT_PC, ucp_Lm },
|
||||
{ 350, PT_PC, ucp_Lo },
|
||||
{ 353, PT_PC, ucp_Lt },
|
||||
{ 356, PT_PC, ucp_Lu },
|
||||
{ 359, PT_GC, ucp_M },
|
||||
{ 361, PT_SC, ucp_Malayalam },
|
||||
{ 371, PT_PC, ucp_Mc },
|
||||
{ 374, PT_PC, ucp_Me },
|
||||
{ 377, PT_PC, ucp_Mn },
|
||||
{ 380, PT_SC, ucp_Mongolian },
|
||||
{ 390, PT_SC, ucp_Myanmar },
|
||||
{ 398, PT_GC, ucp_N },
|
||||
{ 400, PT_PC, ucp_Nd },
|
||||
{ 403, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 415, PT_SC, ucp_Nko },
|
||||
{ 419, PT_PC, ucp_Nl },
|
||||
{ 422, PT_PC, ucp_No },
|
||||
{ 425, PT_SC, ucp_Ogham },
|
||||
{ 431, PT_SC, ucp_Old_Italic },
|
||||
{ 442, PT_SC, ucp_Old_Persian },
|
||||
{ 454, PT_SC, ucp_Oriya },
|
||||
{ 460, PT_SC, ucp_Osmanya },
|
||||
{ 468, PT_GC, ucp_P },
|
||||
{ 470, PT_PC, ucp_Pc },
|
||||
{ 473, PT_PC, ucp_Pd },
|
||||
{ 476, PT_PC, ucp_Pe },
|
||||
{ 479, PT_PC, ucp_Pf },
|
||||
{ 482, PT_SC, ucp_Phags_Pa },
|
||||
{ 491, PT_SC, ucp_Phoenician },
|
||||
{ 502, PT_PC, ucp_Pi },
|
||||
{ 505, PT_PC, ucp_Po },
|
||||
{ 508, PT_PC, ucp_Ps },
|
||||
{ 511, PT_SC, ucp_Runic },
|
||||
{ 517, PT_GC, ucp_S },
|
||||
{ 519, PT_PC, ucp_Sc },
|
||||
{ 522, PT_SC, ucp_Shavian },
|
||||
{ 530, PT_SC, ucp_Sinhala },
|
||||
{ 538, PT_PC, ucp_Sk },
|
||||
{ 541, PT_PC, ucp_Sm },
|
||||
{ 544, PT_PC, ucp_So },
|
||||
{ 547, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 560, PT_SC, ucp_Syriac },
|
||||
{ 567, PT_SC, ucp_Tagalog },
|
||||
{ 575, PT_SC, ucp_Tagbanwa },
|
||||
{ 584, PT_SC, ucp_Tai_Le },
|
||||
{ 591, PT_SC, ucp_Tamil },
|
||||
{ 597, PT_SC, ucp_Telugu },
|
||||
{ 604, PT_SC, ucp_Thaana },
|
||||
{ 611, PT_SC, ucp_Thai },
|
||||
{ 616, PT_SC, ucp_Tibetan },
|
||||
{ 624, PT_SC, ucp_Tifinagh },
|
||||
{ 633, PT_SC, ucp_Ugaritic },
|
||||
{ 642, PT_SC, ucp_Yi },
|
||||
{ 645, PT_GC, ucp_Z },
|
||||
{ 647, PT_PC, ucp_Zl },
|
||||
{ 650, PT_PC, ucp_Zp },
|
||||
{ 653, PT_PC, ucp_Zs }
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Balinese },
|
||||
{ 29, PT_SC, ucp_Bengali },
|
||||
{ 37, PT_SC, ucp_Bopomofo },
|
||||
{ 46, PT_SC, ucp_Braille },
|
||||
{ 54, PT_SC, ucp_Buginese },
|
||||
{ 63, PT_SC, ucp_Buhid },
|
||||
{ 69, PT_GC, ucp_C },
|
||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 91, PT_SC, ucp_Carian },
|
||||
{ 98, PT_PC, ucp_Cc },
|
||||
{ 101, PT_PC, ucp_Cf },
|
||||
{ 104, PT_SC, ucp_Cham },
|
||||
{ 109, PT_SC, ucp_Cherokee },
|
||||
{ 118, PT_PC, ucp_Cn },
|
||||
{ 121, PT_PC, ucp_Co },
|
||||
{ 124, PT_SC, ucp_Common },
|
||||
{ 131, PT_SC, ucp_Coptic },
|
||||
{ 138, PT_PC, ucp_Cs },
|
||||
{ 141, PT_SC, ucp_Cuneiform },
|
||||
{ 151, PT_SC, ucp_Cypriot },
|
||||
{ 159, PT_SC, ucp_Cyrillic },
|
||||
{ 168, PT_SC, ucp_Deseret },
|
||||
{ 176, PT_SC, ucp_Devanagari },
|
||||
{ 187, PT_SC, ucp_Ethiopic },
|
||||
{ 196, PT_SC, ucp_Georgian },
|
||||
{ 205, PT_SC, ucp_Glagolitic },
|
||||
{ 216, PT_SC, ucp_Gothic },
|
||||
{ 223, PT_SC, ucp_Greek },
|
||||
{ 229, PT_SC, ucp_Gujarati },
|
||||
{ 238, PT_SC, ucp_Gurmukhi },
|
||||
{ 247, PT_SC, ucp_Han },
|
||||
{ 251, PT_SC, ucp_Hangul },
|
||||
{ 258, PT_SC, ucp_Hanunoo },
|
||||
{ 266, PT_SC, ucp_Hebrew },
|
||||
{ 273, PT_SC, ucp_Hiragana },
|
||||
{ 282, PT_SC, ucp_Inherited },
|
||||
{ 292, PT_SC, ucp_Kannada },
|
||||
{ 300, PT_SC, ucp_Katakana },
|
||||
{ 309, PT_SC, ucp_Kayah_Li },
|
||||
{ 318, PT_SC, ucp_Kharoshthi },
|
||||
{ 329, PT_SC, ucp_Khmer },
|
||||
{ 335, PT_GC, ucp_L },
|
||||
{ 337, PT_LAMP, 0 },
|
||||
{ 340, PT_SC, ucp_Lao },
|
||||
{ 344, PT_SC, ucp_Latin },
|
||||
{ 350, PT_SC, ucp_Lepcha },
|
||||
{ 357, PT_SC, ucp_Limbu },
|
||||
{ 363, PT_SC, ucp_Linear_B },
|
||||
{ 372, PT_PC, ucp_Ll },
|
||||
{ 375, PT_PC, ucp_Lm },
|
||||
{ 378, PT_PC, ucp_Lo },
|
||||
{ 381, PT_PC, ucp_Lt },
|
||||
{ 384, PT_PC, ucp_Lu },
|
||||
{ 387, PT_SC, ucp_Lycian },
|
||||
{ 394, PT_SC, ucp_Lydian },
|
||||
{ 401, PT_GC, ucp_M },
|
||||
{ 403, PT_SC, ucp_Malayalam },
|
||||
{ 413, PT_PC, ucp_Mc },
|
||||
{ 416, PT_PC, ucp_Me },
|
||||
{ 419, PT_PC, ucp_Mn },
|
||||
{ 422, PT_SC, ucp_Mongolian },
|
||||
{ 432, PT_SC, ucp_Myanmar },
|
||||
{ 440, PT_GC, ucp_N },
|
||||
{ 442, PT_PC, ucp_Nd },
|
||||
{ 445, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 457, PT_SC, ucp_Nko },
|
||||
{ 461, PT_PC, ucp_Nl },
|
||||
{ 464, PT_PC, ucp_No },
|
||||
{ 467, PT_SC, ucp_Ogham },
|
||||
{ 473, PT_SC, ucp_Ol_Chiki },
|
||||
{ 482, PT_SC, ucp_Old_Italic },
|
||||
{ 493, PT_SC, ucp_Old_Persian },
|
||||
{ 505, PT_SC, ucp_Oriya },
|
||||
{ 511, PT_SC, ucp_Osmanya },
|
||||
{ 519, PT_GC, ucp_P },
|
||||
{ 521, PT_PC, ucp_Pc },
|
||||
{ 524, PT_PC, ucp_Pd },
|
||||
{ 527, PT_PC, ucp_Pe },
|
||||
{ 530, PT_PC, ucp_Pf },
|
||||
{ 533, PT_SC, ucp_Phags_Pa },
|
||||
{ 542, PT_SC, ucp_Phoenician },
|
||||
{ 553, PT_PC, ucp_Pi },
|
||||
{ 556, PT_PC, ucp_Po },
|
||||
{ 559, PT_PC, ucp_Ps },
|
||||
{ 562, PT_SC, ucp_Rejang },
|
||||
{ 569, PT_SC, ucp_Runic },
|
||||
{ 575, PT_GC, ucp_S },
|
||||
{ 577, PT_SC, ucp_Saurashtra },
|
||||
{ 588, PT_PC, ucp_Sc },
|
||||
{ 591, PT_SC, ucp_Shavian },
|
||||
{ 599, PT_SC, ucp_Sinhala },
|
||||
{ 607, PT_PC, ucp_Sk },
|
||||
{ 610, PT_PC, ucp_Sm },
|
||||
{ 613, PT_PC, ucp_So },
|
||||
{ 616, PT_SC, ucp_Sundanese },
|
||||
{ 626, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 639, PT_SC, ucp_Syriac },
|
||||
{ 646, PT_SC, ucp_Tagalog },
|
||||
{ 654, PT_SC, ucp_Tagbanwa },
|
||||
{ 663, PT_SC, ucp_Tai_Le },
|
||||
{ 670, PT_SC, ucp_Tamil },
|
||||
{ 676, PT_SC, ucp_Telugu },
|
||||
{ 683, PT_SC, ucp_Thaana },
|
||||
{ 690, PT_SC, ucp_Thai },
|
||||
{ 695, PT_SC, ucp_Tibetan },
|
||||
{ 703, PT_SC, ucp_Tifinagh },
|
||||
{ 712, PT_SC, ucp_Ugaritic },
|
||||
{ 721, PT_SC, ucp_Vai },
|
||||
{ 725, PT_SC, ucp_Yi },
|
||||
{ 728, PT_GC, ucp_Z },
|
||||
{ 730, PT_PC, ucp_Zl },
|
||||
{ 733, PT_PC, ucp_Zp },
|
||||
{ 736, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||
|
@ -43,58 +43,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This module contains code for searching the table of Unicode character
|
||||
properties. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#include "ucp.h" /* Category definitions */
|
||||
#include "ucpinternal.h" /* Internal table details */
|
||||
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
static int ucp_gentype[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Search table and return type *
|
||||
*************************************************/
|
||||
|
||||
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
||||
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
type_ptr the detailed character type is returned here
|
||||
script_ptr the script is returned here
|
||||
|
||||
Returns: the character type category
|
||||
*/
|
||||
|
||||
int
|
||||
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
|
||||
{
|
||||
/* Note that the Unicode types have the same values in glib and in
|
||||
* PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
|
||||
* ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
|
||||
*type_ptr = g_unichar_type(c);
|
||||
*script_ptr = g_unichar_get_script(c);
|
||||
return ucp_gentype[*type_ptr];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
@ -113,7 +64,7 @@ Returns: the other case or NOTACHAR if none
|
||||
unsigned int
|
||||
_pcre_ucp_othercase(const unsigned int c)
|
||||
{
|
||||
int other_case = NOTACHAR;
|
||||
unsigned int other_case = NOTACHAR;
|
||||
|
||||
if (g_unichar_islower(c))
|
||||
other_case = g_unichar_toupper(c);
|
||||
|
@ -1,4 +1,3 @@
|
||||
#include "config.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
/*
|
||||
|
@ -79,7 +79,7 @@ I could find no way of detecting that a macro is defined as an empty string at
|
||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||
the STRING macro with an empty argument when doing the test. */
|
||||
|
||||
PCRE_EXP_DEFN const char *
|
||||
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
|
||||
pcre_version(void)
|
||||
{
|
||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||
|
@ -104,9 +104,7 @@ while ((t = *data++) != XCL_END)
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
int chartype, script;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
|
||||
int chartype = UCD_CHARTYPE(c);
|
||||
switch(*data)
|
||||
{
|
||||
case PT_ANY:
|
||||
@ -119,7 +117,7 @@ while ((t = *data++) != XCL_END)
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
|
||||
if ((data[1] == _pcre_ucp_gentype[chartype]) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
@ -127,7 +125,7 @@ while ((t = *data++) != XCL_END)
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
|
||||
if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
|
@ -125,7 +125,18 @@ enum {
|
||||
ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, /* New for Unicode 5.0.0 */
|
||||
ucp_Nko = G_UNICODE_SCRIPT_NKO, /* New for Unicode 5.0.0 */
|
||||
ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, /* New for Unicode 5.0.0 */
|
||||
ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN /* New for Unicode 5.0.0 */
|
||||
ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN, /* New for Unicode 5.0.0 */
|
||||
ucp_Carian = G_UNICODE_SCRIPT_CARIAN, /* New for Unicode 5.1 */
|
||||
ucp_Cham = G_UNICODE_SCRIPT_CHAM, /* New for Unicode 5.1 */
|
||||
ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI, /* New for Unicode 5.1 */
|
||||
ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA, /* New for Unicode 5.1 */
|
||||
ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN, /* New for Unicode 5.1 */
|
||||
ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN, /* New for Unicode 5.1 */
|
||||
ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI, /* New for Unicode 5.1 */
|
||||
ucp_Rejang = G_UNICODE_SCRIPT_REJANG, /* New for Unicode 5.1 */
|
||||
ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA, /* New for Unicode 5.1 */
|
||||
ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE, /* New for Unicode 5.1 */
|
||||
ucp_Vai = G_UNICODE_SCRIPT_VAI /* New for Unicode 5.1 */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user