regex: Use g_ascii_is[x]digit

This commit is contained in:
Christian Persch 2012-02-12 19:29:42 +01:00 committed by Matthias Clasen
parent 8aadf6b3c9
commit d02f6393df
2 changed files with 101 additions and 138 deletions

View File

@ -52,6 +52,7 @@ supporting internal functions that are not used by other modules. */
#include "pcre_internal.h"
#include "gstrfuncs.h"
/* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
is also used by pcretest. PCRE_DEBUG is not defined when building a production
@ -513,6 +514,7 @@ into a subtraction and unsigned comparison). */
#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
#if 0
#ifndef EBCDIC
/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
@ -626,7 +628,7 @@ static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
#endif
#endif /* 0 */
/* Definition to allow mutual recursion */
@ -812,10 +814,10 @@ else
{
/* In JavaScript, \u must be followed by four hexadecimal numbers.
Otherwise it is a lowercase u letter. */
if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
&& MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
&& MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
&& MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
&& MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0
&& MAX_255(ptr[3]) && g_ascii_isxdigit(ptr[3]) != 0
&& MAX_255(ptr[4]) && g_ascii_isxdigit(ptr[4]) != 0)
{
c = 0;
for (i = 0; i < 4; ++i)
@ -1012,8 +1014,8 @@ else
{
/* In JavaScript, \x must be followed by two hexadecimal numbers.
Otherwise it is a lowercase x letter. */
if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
&& MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
&& MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0)
{
c = 0;
for (i = 0; i < 2; ++i)
@ -1036,7 +1038,7 @@ else
const pcre_uchar *pt = ptr + 2;
c = 0;
while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0)
{
register int cc = *pt++;
if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
@ -1060,7 +1062,7 @@ else
if (c < 0)
{
while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;
while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0) pt++;
*errorcodeptr = ERR34;
}
@ -1078,7 +1080,7 @@ else
/* Read just a single-byte hex-defined char */
c = 0;
while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
while (i++ < 2 && MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0)
{
int cc; /* Some compilers don't like */
cc = *(++ptr); /* ++ in initializers */

View File

@ -1,133 +1,94 @@
--- pcre_compile.c 2006-10-10 12:00:00.000000000 +0200
+++ pcre_compile.c 2006-10-10 12:00:00.000000000 +0200
@@ -246,130 +246,6 @@ static const char *error_texts[] = {
};
From 5238ab10c5f3082a4be38410bd01a47ab176dfde Mon Sep 17 00:00:00 2001
From: Christian Persch <chpe@gnome.org>
Date: Sun, 12 Feb 2012 19:29:42 +0100
Subject: [PATCH] regex: Use g_ascii_is[x]digit
---
glib/pcre/pcre_compile.c | 22 ++++++++++++----------
1 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
index 8070f51..eb985df 100644
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
@@ -52,6 +52,7 @@ supporting internal functions that are not used by other modules. */
#include "pcre_internal.h"
-/* Table to identify digits and hex digits. This is used when compiling
-patterns. Note that the tables in chartables are dependent on the locale, and
-may mark arbitrary characters as digits - but the PCRE compiling code expects
-to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have
-a private table here. It costs 256 bytes, but it is a lot faster than doing
-character value tests (at least in some simple cases I timed), and in some
-applications one wants PCRE to compile efficiently as well as match
-efficiently.
-
-For convenience, we use the same bit definitions as in chartables:
-
- 0x04 decimal digit
- 0x08 hexadecimal digit
-
-Then we can use ctype_digit and ctype_xdigit in the code. */
-
-#if !EBCDIC /* This is the "normal" case, for ASCII systems */
-static const unsigned char digitab[] =
- {
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
- 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */
- 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
-
-#else /* This is the "abnormal" case, for EBCDIC systems */
-static const unsigned char digitab[] =
- {
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- ¬ */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */
- 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */
- 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
-
-static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
- 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */
- 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
- 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
- 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */
- 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */
- 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */
- 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- ¬ */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */
- 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */
- 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */
- 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */
- 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */
- 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */
- 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
- 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */
- 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */
- 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */
- 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
- 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
-#endif
-
+#include "gstrfuncs.h"
/* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
is also used by pcretest. PCRE_DEBUG is not defined when building a production
@@ -513,6 +514,7 @@ into a subtraction and unsigned comparison). */
#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
+#if 0
#ifndef EBCDIC
/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
@@ -626,7 +628,7 @@ static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
#endif
-
+#endif /* 0 */
/* Definition to allow mutual recursion */
static BOOL
@@ -812,10 +814,10 @@ else
{
/* In JavaScript, \u must be followed by four hexadecimal numbers.
Otherwise it is a lowercase u letter. */
- if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
- && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
- && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
- && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
+ if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
+ && MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0
+ && MAX_255(ptr[3]) && g_ascii_isxdigit(ptr[3]) != 0
+ && MAX_255(ptr[4]) && g_ascii_isxdigit(ptr[4]) != 0)
{
c = 0;
for (i = 0; i < 4; ++i)
@@ -1012,8 +1014,8 @@ else
{
/* In JavaScript, \x must be followed by two hexadecimal numbers.
Otherwise it is a lowercase x letter. */
- if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
- && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
+ if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
+ && MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0)
{
c = 0;
for (i = 0; i < 2; ++i)
@@ -1036,7 +1038,7 @@ else
const pcre_uchar *pt = ptr + 2;
c = 0;
- while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
+ while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0)
{
register int cc = *pt++;
if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
@@ -1060,7 +1062,7 @@ else
if (c < 0)
{
- while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;
+ while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0) pt++;
*errorcodeptr = ERR34;
}
@@ -1078,7 +1080,7 @@ else
/* Read just a single-byte hex-defined char */
c = 0;
- while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
+ while (i++ < 2 && MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0)
{
int cc; /* Some compilers don't like */
cc = *(++ptr); /* ++ in initializers */
--
1.7.5.1.217.g4e3aa.dirty