regex: Use glib for unicode data

Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
This commit is contained in:
Christian Persch 2012-02-12 21:20:33 +01:00 committed by Matthias Clasen
parent 75dffb99b3
commit 706b72db21
9 changed files with 958 additions and 239 deletions

View File

@ -51,7 +51,6 @@ libpcre_la_SOURCES = \
pcre_string_utils.c \ pcre_string_utils.c \
pcre_study.c \ pcre_study.c \
pcre_tables.c \ pcre_tables.c \
pcre_ucd.c \
pcre_valid_utf8.c \ pcre_valid_utf8.c \
pcre_version.c \ pcre_version.c \
pcre_xclass.c \ pcre_xclass.c \

View File

@ -2890,43 +2890,43 @@ Returns: TRUE if auto-possessifying is OK
static BOOL static BOOL
check_char_prop(int c, int ptype, int pdata, BOOL negated) check_char_prop(int c, int ptype, int pdata, BOOL negated)
{ {
const ucd_record *prop = GET_UCD(c); const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(ptype) switch(ptype)
{ {
case PT_LAMP: case PT_LAMP:
return (prop->chartype == ucp_Lu || return (chartype == ucp_Lu ||
prop->chartype == ucp_Ll || chartype == ucp_Ll ||
prop->chartype == ucp_Lt) == negated; chartype == ucp_Lt) == negated;
case PT_GC: case PT_GC:
return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated; return (pdata == PRIV(ucp_gentype)[chartype]) == negated;
case PT_PC: case PT_PC:
return (pdata == prop->chartype) == negated; return (pdata == chartype) == negated;
case PT_SC: case PT_SC:
return (pdata == prop->script) == negated; return (pdata == UCD_SCRIPT(c)) == negated;
/* These are specials */ /* These are specials */
case PT_ALNUM: case PT_ALNUM:
return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || return (PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated; PRIV(ucp_gentype)[chartype] == ucp_N) == negated;
case PT_SPACE: /* Perl space */ case PT_SPACE: /* Perl space */
return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
== negated; == negated;
case PT_PXSPACE: /* POSIX space */ case PT_PXSPACE: /* POSIX space */
return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR) c == CHAR_FF || c == CHAR_CR)
== negated; == negated;
case PT_WORD: case PT_WORD:
return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || return (PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE) == negated; c == CHAR_UNDERSCORE) == negated;
} }
return FALSE; return FALSE;

View File

@ -1015,7 +1015,7 @@ for (;;)
if (clen > 0) if (clen > 0)
{ {
BOOL OK; BOOL OK;
const ucd_record * prop = GET_UCD(c); const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[1]) switch(code[1])
{ {
case PT_ANY: case PT_ANY:
@ -1023,43 +1023,43 @@ for (;;)
break; break;
case PT_LAMP: case PT_LAMP:
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || OK = chartype == ucp_Lu || chartype == ucp_Ll ||
prop->chartype == ucp_Lt; chartype == ucp_Lt;
break; break;
case PT_GC: case PT_GC:
OK = PRIV(ucp_gentype)[prop->chartype] == code[2]; OK = PRIV(ucp_gentype)[chartype] == code[2];
break; break;
case PT_PC: case PT_PC:
OK = prop->chartype == code[2]; OK = chartype == code[2];
break; break;
case PT_SC: case PT_SC:
OK = prop->script == code[2]; OK = UCD_SCRIPT(c) == code[2];
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */
case PT_ALNUM: case PT_ALNUM:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N; PRIV(ucp_gentype)[chartype] == ucp_N;
break; break;
case PT_SPACE: /* Perl space */ case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break; break;
case PT_PXSPACE: /* POSIX space */ case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR; c == CHAR_FF || c == CHAR_CR;
break; break;
case PT_WORD: case PT_WORD:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE; c == CHAR_UNDERSCORE;
break; break;
@ -1209,7 +1209,7 @@ for (;;)
if (clen > 0) if (clen > 0)
{ {
BOOL OK; BOOL OK;
const ucd_record * prop = GET_UCD(c); const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[2]) switch(code[2])
{ {
case PT_ANY: case PT_ANY:
@ -1217,43 +1217,43 @@ for (;;)
break; break;
case PT_LAMP: case PT_LAMP:
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || OK = chartype == ucp_Lu || chartype == ucp_Ll ||
prop->chartype == ucp_Lt; chartype == ucp_Lt;
break; break;
case PT_GC: case PT_GC:
OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; OK = PRIV(ucp_gentype)[chartype] == code[3];
break; break;
case PT_PC: case PT_PC:
OK = prop->chartype == code[3]; OK = chartype == code[3];
break; break;
case PT_SC: case PT_SC:
OK = prop->script == code[3]; OK = UCD_SCRIPT(c) == code[3];
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */
case PT_ALNUM: case PT_ALNUM:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N; PRIV(ucp_gentype)[chartype] == ucp_N;
break; break;
case PT_SPACE: /* Perl space */ case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break; break;
case PT_PXSPACE: /* POSIX space */ case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR; c == CHAR_FF || c == CHAR_CR;
break; break;
case PT_WORD: case PT_WORD:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE; c == CHAR_UNDERSCORE;
break; break;
@ -1456,7 +1456,7 @@ for (;;)
if (clen > 0) if (clen > 0)
{ {
BOOL OK; BOOL OK;
const ucd_record * prop = GET_UCD(c); const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[2]) switch(code[2])
{ {
case PT_ANY: case PT_ANY:
@ -1464,43 +1464,43 @@ for (;;)
break; break;
case PT_LAMP: case PT_LAMP:
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || OK = chartype == ucp_Lu || chartype == ucp_Ll ||
prop->chartype == ucp_Lt; chartype == ucp_Lt;
break; break;
case PT_GC: case PT_GC:
OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; OK = PRIV(ucp_gentype)[chartype] == code[3];
break; break;
case PT_PC: case PT_PC:
OK = prop->chartype == code[3]; OK = chartype == code[3];
break; break;
case PT_SC: case PT_SC:
OK = prop->script == code[3]; OK = UCD_SCRIPT(c) == code[3];
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */
case PT_ALNUM: case PT_ALNUM:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N; PRIV(ucp_gentype)[chartype] == ucp_N;
break; break;
case PT_SPACE: /* Perl space */ case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break; break;
case PT_PXSPACE: /* POSIX space */ case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR; c == CHAR_FF || c == CHAR_CR;
break; break;
case PT_WORD: case PT_WORD:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE; c == CHAR_UNDERSCORE;
break; break;
@ -1728,7 +1728,7 @@ for (;;)
if (clen > 0) if (clen > 0)
{ {
BOOL OK; BOOL OK;
const ucd_record * prop = GET_UCD(c); const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[1 + IMM2_SIZE + 1]) switch(code[1 + IMM2_SIZE + 1])
{ {
case PT_ANY: case PT_ANY:
@ -1736,43 +1736,43 @@ for (;;)
break; break;
case PT_LAMP: case PT_LAMP:
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || OK = chartype == ucp_Lu || chartype == ucp_Ll ||
prop->chartype == ucp_Lt; chartype == ucp_Lt;
break; break;
case PT_GC: case PT_GC:
OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
break; break;
case PT_PC: case PT_PC:
OK = prop->chartype == code[1 + IMM2_SIZE + 2]; OK = chartype == code[1 + IMM2_SIZE + 2];
break; break;
case PT_SC: case PT_SC:
OK = prop->script == code[1 + IMM2_SIZE + 2]; OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */
case PT_ALNUM: case PT_ALNUM:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N; PRIV(ucp_gentype)[chartype] == ucp_N;
break; break;
case PT_SPACE: /* Perl space */ case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break; break;
case PT_PXSPACE: /* POSIX space */ case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR; c == CHAR_FF || c == CHAR_CR;
break; break;
case PT_WORD: case PT_WORD:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE; c == CHAR_UNDERSCORE;
break; break;

View File

@ -2507,7 +2507,7 @@ for (;;)
} }
GETCHARINCTEST(c, eptr); GETCHARINCTEST(c, eptr);
{ {
const ucd_record *prop = GET_UCD(c); const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(ecode[1]) switch(ecode[1])
{ {
@ -2516,44 +2516,44 @@ for (;;)
break; break;
case PT_LAMP: case PT_LAMP:
if ((prop->chartype == ucp_Lu || if ((chartype == ucp_Lu ||
prop->chartype == ucp_Ll || chartype == ucp_Ll ||
prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) chartype == ucp_Lt) == (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
break; break;
case PT_GC: case PT_GC:
if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP)) if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
break; break;
case PT_PC: case PT_PC:
if ((ecode[2] != prop->chartype) == (op == OP_PROP)) if ((ecode[2] != chartype) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
break; break;
case PT_SC: case PT_SC:
if ((ecode[2] != prop->script) == (op == OP_PROP)) if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
break; break;
/* These are specials */ /* These are specials */
case PT_ALNUM: case PT_ALNUM:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
break; break;
case PT_SPACE: /* Perl space */ case PT_SPACE: /* Perl space */
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
== (op == OP_NOTPROP)) == (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
break; break;
case PT_PXSPACE: /* POSIX space */ case PT_PXSPACE: /* POSIX space */
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR) c == CHAR_FF || c == CHAR_CR)
== (op == OP_NOTPROP)) == (op == OP_NOTPROP))
@ -2561,8 +2561,8 @@ for (;;)
break; break;
case PT_WORD: case PT_WORD:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
break; break;

View File

@ -2315,15 +2315,12 @@ extern const int PRIV(ucp_typerange)[];
#ifdef SUPPORT_UCP #ifdef SUPPORT_UCP
/* UCD access macros */ /* UCD access macros */
#define UCD_BLOCK_SIZE 128 unsigned int _pcre_ucp_othercase(const unsigned int c);
#define GET_UCD(ch) (PRIV(ucd_records) + \
PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype #define UCD_CHARTYPE(ch) (pcre_uint8)g_unichar_type((gunichar)(ch))
#define UCD_SCRIPT(ch) GET_UCD(ch)->script #define UCD_SCRIPT(ch) (pcre_uint8)g_unichar_get_script((gunichar)(ch))
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case) #define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch))
#endif /* SUPPORT_UCP */ #endif /* SUPPORT_UCP */

View File

@ -563,6 +563,22 @@ const ucp_type_table PRIV(utt)[] = {
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
unsigned int
_pcre_ucp_othercase(const unsigned int c)
{
int other_case = NOTACHAR;
if (g_unichar_islower(c))
other_case = g_unichar_toupper(c);
else if (g_unichar_isupper(c))
other_case = g_unichar_tolower(c);
if (other_case == c)
other_case = NOTACHAR;
return other_case;
}
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UTF */
/* End of pcre_tables.c */ /* End of pcre_tables.c */

View File

@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END)
#ifdef SUPPORT_UCP #ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */ else /* XCL_PROP & XCL_NOTPROP */
{ {
const ucd_record *prop = GET_UCD(c); const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(*data) switch(*data)
{ {
@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END)
break; break;
case PT_LAMP: case PT_LAMP:
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || if ((chartype == ucp_Lu || chartype == ucp_Ll ||
prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
break; break;
case PT_GC: case PT_GC:
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP)) if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
return !negated; return !negated;
break; break;
case PT_PC: case PT_PC:
if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated; if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
break; break;
case PT_SC: case PT_SC:
if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated; if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
break; break;
case PT_ALNUM: case PT_ALNUM:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP)) PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
return !negated; return !negated;
break; break;
case PT_SPACE: /* Perl space */ case PT_SPACE: /* Perl space */
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
== (t == XCL_PROP)) == (t == XCL_PROP))
return !negated; return !negated;
break; break;
case PT_PXSPACE: /* POSIX space */ case PT_PXSPACE: /* POSIX space */
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
return !negated; return !negated;
break; break;
case PT_WORD: case PT_WORD:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
== (t == XCL_PROP)) == (t == XCL_PROP))
return !negated; return !negated;
break; break;

View File

@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode
should always be at the end of each enum, for backwards compatibility. */ should always be at the end of each enum, for backwards compatibility. */
/* These are the general character categories. */ /* These are the general character categories. */
#include "gunicode.h"
enum { enum {
ucp_C, /* Other */ ucp_C, /* Other */
@ -24,148 +25,148 @@ enum {
/* These are the particular character types. */ /* These are the particular character types. */
enum { enum {
ucp_Cc, /* Control */ ucp_Cc = G_UNICODE_CONTROL, /* Control */
ucp_Cf, /* Format */ ucp_Cf = G_UNICODE_FORMAT, /* Format */
ucp_Cn, /* Unassigned */ ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */
ucp_Co, /* Private use */ ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */
ucp_Cs, /* Surrogate */ ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */
ucp_Ll, /* Lower case letter */ ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */
ucp_Lm, /* Modifier letter */ ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */
ucp_Lo, /* Other letter */ ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */
ucp_Lt, /* Title case letter */ ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */
ucp_Lu, /* Upper case letter */ ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */
ucp_Mc, /* Spacing mark */ ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */
ucp_Me, /* Enclosing mark */ ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */
ucp_Mn, /* Non-spacing mark */ ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */
ucp_Nd, /* Decimal number */ ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */
ucp_Nl, /* Letter number */ ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */
ucp_No, /* Other number */ ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */
ucp_Pc, /* Connector punctuation */ ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */
ucp_Pd, /* Dash punctuation */ ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */
ucp_Pe, /* Close punctuation */ ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */
ucp_Pf, /* Final punctuation */ ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */
ucp_Pi, /* Initial punctuation */ ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */
ucp_Po, /* Other punctuation */ ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */
ucp_Ps, /* Open punctuation */ ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */
ucp_Sc, /* Currency symbol */ ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */
ucp_Sk, /* Modifier symbol */ ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */
ucp_Sm, /* Mathematical symbol */ ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */
ucp_So, /* Other symbol */ ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */
ucp_Zl, /* Line separator */ ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */
ucp_Zp, /* Paragraph separator */ ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */
ucp_Zs /* Space separator */ ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */
}; };
/* These are the script identifications. */ /* These are the script identifications. */
enum { enum {
ucp_Arabic, ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
ucp_Armenian, ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
ucp_Bengali, ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
ucp_Bopomofo, ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
ucp_Braille, ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
ucp_Buginese, ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
ucp_Buhid, ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
ucp_Canadian_Aboriginal, ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
ucp_Cherokee, ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
ucp_Common, ucp_Common = G_UNICODE_SCRIPT_COMMON,
ucp_Coptic, ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
ucp_Cypriot, ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
ucp_Cyrillic, ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
ucp_Deseret, ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
ucp_Devanagari, ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
ucp_Ethiopic, ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
ucp_Georgian, ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
ucp_Glagolitic, ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
ucp_Gothic, ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
ucp_Greek, ucp_Greek = G_UNICODE_SCRIPT_GREEK,
ucp_Gujarati, ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
ucp_Gurmukhi, ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
ucp_Han, ucp_Han = G_UNICODE_SCRIPT_HAN,
ucp_Hangul, ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
ucp_Hanunoo, ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
ucp_Hebrew, ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
ucp_Hiragana, ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
ucp_Inherited, ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
ucp_Kannada, ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
ucp_Katakana, ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
ucp_Kharoshthi, ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
ucp_Khmer, ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
ucp_Lao, ucp_Lao = G_UNICODE_SCRIPT_LAO,
ucp_Latin, ucp_Latin = G_UNICODE_SCRIPT_LATIN,
ucp_Limbu, ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
ucp_Linear_B, ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
ucp_Malayalam, ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
ucp_Mongolian, ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
ucp_Myanmar, ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
ucp_New_Tai_Lue, ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
ucp_Ogham, ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
ucp_Old_Italic, ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
ucp_Old_Persian, ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
ucp_Oriya, ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
ucp_Osmanya, ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
ucp_Runic, ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
ucp_Shavian, ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
ucp_Sinhala, ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
ucp_Syloti_Nagri, ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
ucp_Syriac, ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
ucp_Tagalog, ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
ucp_Tagbanwa, ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
ucp_Tai_Le, ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
ucp_Tamil, ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
ucp_Telugu, ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
ucp_Thaana, ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
ucp_Thai, ucp_Thai = G_UNICODE_SCRIPT_THAI,
ucp_Tibetan, ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
ucp_Tifinagh, ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
ucp_Ugaritic, ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
ucp_Yi, ucp_Yi = G_UNICODE_SCRIPT_YI,
/* New for Unicode 5.0: */ /* New for Unicode 5.0: */
ucp_Balinese, ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
ucp_Cuneiform, ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
ucp_Nko, ucp_Nko = G_UNICODE_SCRIPT_NKO,
ucp_Phags_Pa, ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
ucp_Phoenician, ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
/* New for Unicode 5.1: */ /* New for Unicode 5.1: */
ucp_Carian, ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
ucp_Cham, ucp_Cham = G_UNICODE_SCRIPT_CHAM,
ucp_Kayah_Li, ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
ucp_Lepcha, ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
ucp_Lycian, ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
ucp_Lydian, ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
ucp_Ol_Chiki, ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
ucp_Rejang, ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
ucp_Saurashtra, ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
ucp_Sundanese, ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
ucp_Vai, ucp_Vai = G_UNICODE_SCRIPT_VAI,
/* New for Unicode 5.2: */ /* New for Unicode 5.2: */
ucp_Avestan, ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
ucp_Bamum, ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
ucp_Egyptian_Hieroglyphs, ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
ucp_Imperial_Aramaic, ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
ucp_Inscriptional_Pahlavi, ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
ucp_Inscriptional_Parthian, ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
ucp_Javanese, ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
ucp_Kaithi, ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
ucp_Lisu, ucp_Lisu = G_UNICODE_SCRIPT_LISU,
ucp_Meetei_Mayek, ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
ucp_Old_South_Arabian, ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
ucp_Old_Turkic, ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
ucp_Samaritan, ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
ucp_Tai_Tham, ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
ucp_Tai_Viet, ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
/* New for Unicode 6.0.0: */ /* New for Unicode 6.0.0: */
ucp_Batak, ucp_Batak = G_UNICODE_SCRIPT_BATAK,
ucp_Brahmi, ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
ucp_Mandaic, ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
/* New for Unicode 6.1.0: */ /* New for Unicode 6.1.0: */
ucp_Chakma, ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
ucp_Meroitic_Cursive, ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
ucp_Meroitic_Hieroglyphs, ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
ucp_Miao, ucp_Miao = G_UNICODE_SCRIPT_MIAO,
ucp_Sharada, ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
ucp_Sora_Sompeng, ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
ucp_Takri ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
}; };
#endif #endif

View File

@ -1,6 +1,632 @@
--- pcre/ucp.h 2006-07-05 13:28:01.000000000 +0200 From 384879be07418fc6224b6603a2e8ca6f11e178fc Mon Sep 17 00:00:00 2001
+++ pcre/ucp.h 2006-10-09 16:27:19.000000000 +0200 From: Christian Persch <chpe@gnome.org>
@@ -60,72 +60,72 @@ enum { Date: Sun, 12 Feb 2012 21:20:33 +0100
Subject: [PATCH] regex: Use glib for unicode data
Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
---
glib/pcre/Makefile.am | 1 -
glib/pcre/pcre_compile.c | 26 +++---
glib/pcre/pcre_dfa_exec.c | 96 ++++++++--------
glib/pcre/pcre_exec.c | 26 +++---
glib/pcre/pcre_internal.h | 11 +--
glib/pcre/pcre_tables.c | 16 +++
glib/pcre/pcre_xclass.c | 24 ++--
glib/pcre/ucp.h | 265 +++++++++++++++++++++++----------------------
8 files changed, 239 insertions(+), 226 deletions(-)
diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am
index 21da5c5..1981953 100644
--- a/glib/pcre/Makefile.am
+++ b/glib/pcre/Makefile.am
@@ -51,7 +51,6 @@ libpcre_la_SOURCES = \
pcre_string_utils.c \
pcre_study.c \
pcre_tables.c \
- pcre_ucd.c \
pcre_valid_utf8.c \
pcre_version.c \
pcre_xclass.c \
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
index eb985df..b44055a 100644
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
@@ -2890,43 +2890,43 @@ Returns: TRUE if auto-possessifying is OK
static BOOL
check_char_prop(int c, int ptype, int pdata, BOOL negated)
{
-const ucd_record *prop = GET_UCD(c);
+const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(ptype)
{
case PT_LAMP:
- return (prop->chartype == ucp_Lu ||
- prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt) == negated;
+ return (chartype == ucp_Lu ||
+ chartype == ucp_Ll ||
+ chartype == ucp_Lt) == negated;
case PT_GC:
- return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
+ return (pdata == PRIV(ucp_gentype)[chartype]) == negated;
case PT_PC:
- return (pdata == prop->chartype) == negated;
+ return (pdata == chartype) == negated;
case PT_SC:
- return (pdata == prop->script) == negated;
+ return (pdata == UCD_SCRIPT(c)) == negated;
/* These are specials */
case PT_ALNUM:
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
+ return (PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N) == negated;
case PT_SPACE: /* Perl space */
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
== negated;
case PT_PXSPACE: /* POSIX space */
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR)
== negated;
case PT_WORD:
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ return (PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE) == negated;
}
return FALSE;
diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
index 21d7be6..41ff65b 100644
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@@ -1015,7 +1015,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
- const ucd_record * prop = GET_UCD(c);
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[1])
{
case PT_ANY:
@@ -1023,43 +1023,43 @@ for (;;)
break;
case PT_LAMP:
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt;
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+ chartype == ucp_Lt;
break;
case PT_GC:
- OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
+ OK = PRIV(ucp_gentype)[chartype] == code[2];
break;
case PT_PC:
- OK = prop->chartype == code[2];
+ OK = chartype == code[2];
break;
case PT_SC:
- OK = prop->script == code[2];
+ OK = UCD_SCRIPT(c) == code[2];
break;
/* These are specials for combination cases. */
case PT_ALNUM:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
break;
case PT_WORD:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
@@ -1209,7 +1209,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
- const ucd_record * prop = GET_UCD(c);
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[2])
{
case PT_ANY:
@@ -1217,43 +1217,43 @@ for (;;)
break;
case PT_LAMP:
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt;
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+ chartype == ucp_Lt;
break;
case PT_GC:
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
+ OK = PRIV(ucp_gentype)[chartype] == code[3];
break;
case PT_PC:
- OK = prop->chartype == code[3];
+ OK = chartype == code[3];
break;
case PT_SC:
- OK = prop->script == code[3];
+ OK = UCD_SCRIPT(c) == code[3];
break;
/* These are specials for combination cases. */
case PT_ALNUM:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
break;
case PT_WORD:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
@@ -1456,7 +1456,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
- const ucd_record * prop = GET_UCD(c);
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[2])
{
case PT_ANY:
@@ -1464,43 +1464,43 @@ for (;;)
break;
case PT_LAMP:
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt;
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+ chartype == ucp_Lt;
break;
case PT_GC:
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
+ OK = PRIV(ucp_gentype)[chartype] == code[3];
break;
case PT_PC:
- OK = prop->chartype == code[3];
+ OK = chartype == code[3];
break;
case PT_SC:
- OK = prop->script == code[3];
+ OK = UCD_SCRIPT(c) == code[3];
break;
/* These are specials for combination cases. */
case PT_ALNUM:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
break;
case PT_WORD:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
@@ -1728,7 +1728,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
- const ucd_record * prop = GET_UCD(c);
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[1 + IMM2_SIZE + 1])
{
case PT_ANY:
@@ -1736,43 +1736,43 @@ for (;;)
break;
case PT_LAMP:
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt;
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+ chartype == ucp_Lt;
break;
case PT_GC:
- OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
+ OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
break;
case PT_PC:
- OK = prop->chartype == code[1 + IMM2_SIZE + 2];
+ OK = chartype == code[1 + IMM2_SIZE + 2];
break;
case PT_SC:
- OK = prop->script == code[1 + IMM2_SIZE + 2];
+ OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
break;
/* These are specials for combination cases. */
case PT_ALNUM:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
break;
case PT_WORD:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
index b715353..8eb3162 100644
--- a/glib/pcre/pcre_exec.c
+++ b/glib/pcre/pcre_exec.c
@@ -2507,7 +2507,7 @@ for (;;)
}
GETCHARINCTEST(c, eptr);
{
- const ucd_record *prop = GET_UCD(c);
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(ecode[1])
{
@@ -2516,44 +2516,44 @@ for (;;)
break;
case PT_LAMP:
- if ((prop->chartype == ucp_Lu ||
- prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
+ if ((chartype == ucp_Lu ||
+ chartype == ucp_Ll ||
+ chartype == ucp_Lt) == (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
break;
case PT_GC:
- if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
+ if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
break;
case PT_PC:
- if ((ecode[2] != prop->chartype) == (op == OP_PROP))
+ if ((ecode[2] != chartype) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
break;
case PT_SC:
- if ((ecode[2] != prop->script) == (op == OP_PROP))
+ if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
break;
/* These are specials */
case PT_ALNUM:
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
break;
case PT_SPACE: /* Perl space */
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
== (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
break;
case PT_PXSPACE: /* POSIX space */
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR)
== (op == OP_NOTPROP))
@@ -2561,8 +2561,8 @@ for (;;)
break;
case PT_WORD:
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
break;
diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
index e5a4b6a..41c7ee3 100644
--- a/glib/pcre/pcre_internal.h
+++ b/glib/pcre/pcre_internal.h
@@ -2315,15 +2315,12 @@ extern const int PRIV(ucp_typerange)[];
#ifdef SUPPORT_UCP
/* UCD access macros */
-#define UCD_BLOCK_SIZE 128
-#define GET_UCD(ch) (PRIV(ucd_records) + \
- PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
- UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
+unsigned int _pcre_ucp_othercase(const unsigned int c);
-#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
-#define UCD_SCRIPT(ch) GET_UCD(ch)->script
+#define UCD_CHARTYPE(ch) (pcre_uint8)g_unichar_type((gunichar)(ch))
+#define UCD_SCRIPT(ch) (pcre_uint8)g_unichar_get_script((gunichar)(ch))
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
-#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
+#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch))
#endif /* SUPPORT_UCP */
diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
index c8134ec..47becc7 100644
--- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c
@@ -563,6 +563,22 @@ const ucp_type_table PRIV(utt)[] = {
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
+unsigned int
+_pcre_ucp_othercase(const unsigned int c)
+{
+ int other_case = NOTACHAR;
+
+ if (g_unichar_islower(c))
+ other_case = g_unichar_toupper(c);
+ else if (g_unichar_isupper(c))
+ other_case = g_unichar_tolower(c);
+
+ if (other_case == c)
+ other_case = NOTACHAR;
+
+ return other_case;
+}
+
#endif /* SUPPORT_UTF */
/* End of pcre_tables.c */
diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c
index dca7a39..e5a55d7 100644
--- a/glib/pcre/pcre_xclass.c
+++ b/glib/pcre/pcre_xclass.c
@@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END)
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
- const ucd_record *prop = GET_UCD(c);
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(*data)
{
@@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END)
break;
case PT_LAMP:
- if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
+ if ((chartype == ucp_Lu || chartype == ucp_Ll ||
+ chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
break;
case PT_GC:
- if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
+ if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
return !negated;
break;
case PT_PC:
- if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
+ if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
break;
case PT_SC:
- if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
+ if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
break;
case PT_ALNUM:
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
return !negated;
break;
case PT_SPACE: /* Perl space */
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
== (t == XCL_PROP))
return !negated;
break;
case PT_PXSPACE: /* POSIX space */
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
return !negated;
break;
case PT_WORD:
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
== (t == XCL_PROP))
return !negated;
break;
diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h
index 59c3bec..53a48c9 100644
--- a/glib/pcre/ucp.h
+++ b/glib/pcre/ucp.h
@@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode
should always be at the end of each enum, for backwards compatibility. */
/* These are the general character categories. */
+#include "gunicode.h"
enum {
ucp_C, /* Other */
@@ -24,148 +25,148 @@ enum {
/* These are the particular character types. */
enum {
- ucp_Cc, /* Control */
- ucp_Cf, /* Format */
- ucp_Cn, /* Unassigned */
- ucp_Co, /* Private use */
- ucp_Cs, /* Surrogate */
- ucp_Ll, /* Lower case letter */
- ucp_Lm, /* Modifier letter */
- ucp_Lo, /* Other letter */
- ucp_Lt, /* Title case letter */
- ucp_Lu, /* Upper case letter */
- ucp_Mc, /* Spacing mark */
- ucp_Me, /* Enclosing mark */
- ucp_Mn, /* Non-spacing mark */
- ucp_Nd, /* Decimal number */
- ucp_Nl, /* Letter number */
- ucp_No, /* Other number */
- ucp_Pc, /* Connector punctuation */
- ucp_Pd, /* Dash punctuation */
- ucp_Pe, /* Close punctuation */
- ucp_Pf, /* Final punctuation */
- ucp_Pi, /* Initial punctuation */
- ucp_Po, /* Other punctuation */
- ucp_Ps, /* Open punctuation */
- ucp_Sc, /* Currency symbol */
- ucp_Sk, /* Modifier symbol */
- ucp_Sm, /* Mathematical symbol */
- ucp_So, /* Other symbol */
- ucp_Zl, /* Line separator */
- ucp_Zp, /* Paragraph separator */
- ucp_Zs /* Space separator */
+ ucp_Cc = G_UNICODE_CONTROL, /* Control */
+ ucp_Cf = G_UNICODE_FORMAT, /* Format */
+ ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */
+ ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */
+ ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */
+ ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */
+ ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */
+ ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */
+ ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */
+ ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */
+ ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */
+ ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */
+ ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */
+ ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */
+ ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */
+ ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */
+ ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */
+ ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */
+ ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */
+ ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */
+ ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */
+ ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */
+ ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */
+ ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */
+ ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */
+ ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */
+ ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */
+ ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */
+ ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */
+ ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */
};
/* These are the script identifications. */ /* These are the script identifications. */
enum { enum {
@ -65,11 +691,6 @@
- ucp_Tifinagh, - ucp_Tifinagh,
- ucp_Ugaritic, - ucp_Ugaritic,
- ucp_Yi, - ucp_Yi,
- ucp_Balinese, /* New for Unicode 5.0.0 */
- ucp_Cuneiform, /* New for Unicode 5.0.0 */
- ucp_Nko, /* New for Unicode 5.0.0 */
- ucp_Phags_Pa, /* New for Unicode 5.0.0 */
- ucp_Phoenician /* New for Unicode 5.0.0 */
+ ucp_Arabic = G_UNICODE_SCRIPT_ARABIC, + ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
+ ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN, + ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
+ ucp_Bengali = G_UNICODE_SCRIPT_BENGALI, + ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
@ -131,11 +752,96 @@
+ ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH, + ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
+ ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC, + ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
+ ucp_Yi = G_UNICODE_SCRIPT_YI, + ucp_Yi = G_UNICODE_SCRIPT_YI,
+ ucp_Balinese = G_UNICODE_SCRIPT_BALINESE, /* New for Unicode 5.0.0 */ /* New for Unicode 5.0: */
+ ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, /* New for Unicode 5.0.0 */ - ucp_Balinese,
+ ucp_Nko = G_UNICODE_SCRIPT_NKO, /* New for Unicode 5.0.0 */ - ucp_Cuneiform,
+ ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, /* New for Unicode 5.0.0 */ - ucp_Nko,
+ ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN /* New for Unicode 5.0.0 */ - ucp_Phags_Pa,
- ucp_Phoenician,
+ ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
+ ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
+ ucp_Nko = G_UNICODE_SCRIPT_NKO,
+ ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
+ ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
/* New for Unicode 5.1: */
- ucp_Carian,
- ucp_Cham,
- ucp_Kayah_Li,
- ucp_Lepcha,
- ucp_Lycian,
- ucp_Lydian,
- ucp_Ol_Chiki,
- ucp_Rejang,
- ucp_Saurashtra,
- ucp_Sundanese,
- ucp_Vai,
+ ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
+ ucp_Cham = G_UNICODE_SCRIPT_CHAM,
+ ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
+ ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
+ ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
+ ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
+ ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
+ ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
+ ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
+ ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
+ ucp_Vai = G_UNICODE_SCRIPT_VAI,
/* New for Unicode 5.2: */
- ucp_Avestan,
- ucp_Bamum,
- ucp_Egyptian_Hieroglyphs,
- ucp_Imperial_Aramaic,
- ucp_Inscriptional_Pahlavi,
- ucp_Inscriptional_Parthian,
- ucp_Javanese,
- ucp_Kaithi,
- ucp_Lisu,
- ucp_Meetei_Mayek,
- ucp_Old_South_Arabian,
- ucp_Old_Turkic,
- ucp_Samaritan,
- ucp_Tai_Tham,
- ucp_Tai_Viet,
+ ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
+ ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
+ ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
+ ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
+ ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
+ ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
+ ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
+ ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
+ ucp_Lisu = G_UNICODE_SCRIPT_LISU,
+ ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
+ ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
+ ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
+ ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
+ ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
+ ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
/* New for Unicode 6.0.0: */
- ucp_Batak,
- ucp_Brahmi,
- ucp_Mandaic,
+ ucp_Batak = G_UNICODE_SCRIPT_BATAK,
+ ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
+ ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
/* New for Unicode 6.1.0: */
- ucp_Chakma,
- ucp_Meroitic_Cursive,
- ucp_Meroitic_Hieroglyphs,
- ucp_Miao,
- ucp_Sharada,
- ucp_Sora_Sompeng,
- ucp_Takri
+ ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
+ ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
+ ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
+ ucp_Miao = G_UNICODE_SCRIPT_MIAO,
+ ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
+ ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
+ ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
}; };
#endif #endif
--
1.7.5.1.217.g4e3aa.dirty