mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-11-02 09:22:16 +01:00
regex: Remove internal copy of libpcre
The internal copy of libpcre is at version 8.31, which is outdated, buggy and vulnerable. Updating to a more recent version is difficult because glib doesn't contain all the neccessary unicode data tables, see [https://bugzilla.gnome.org/show_bug.cgi?id=689791] and [https://bugzilla.gnome.org/show_bug.cgi?id=684222]. The reason for copying libpcre into glib was that it is patched to use the glib unicode data tables instead of its own. However, it turns out that libgio-2.0 links to libselinux, which in turn links to libpcre. This means that all gtk+/gnome applications actually have two copies of libpcre, more than negating any savings from the shared unicode tables. Bump the required pcre version to the latest, which is 8.36. Keep the --with-pcre configure switch which will be repurposed to select between pcre and pcre2 soon.
This commit is contained in:
29
configure.ac
29
configure.ac
@@ -2461,16 +2461,15 @@ dnl *********************
|
||||
dnl *** GRegex checks ***
|
||||
dnl *********************
|
||||
|
||||
PCRE_REQUIRED_VERSION=8.13
|
||||
PCRE_REQUIRED_VERSION=8.36
|
||||
|
||||
# Check if we should use the internal or the system-supplied pcre
|
||||
AC_ARG_WITH(pcre,
|
||||
[AS_HELP_STRING([--with-pcre=@<:@internal/system@:>@],
|
||||
[whether to use system PCRE [default=internal]])])
|
||||
# Check which pcre we should use
|
||||
AC_ARG_WITH([pcre],
|
||||
[AS_HELP_STRING([--with-pcre=@<:@pcre@:>@],
|
||||
[which pcre library to use (default: pcre)])],
|
||||
[],[with_pcre=pcre])
|
||||
|
||||
AM_CONDITIONAL(USE_SYSTEM_PCRE, [test "x$with_pcre" = xsystem])
|
||||
|
||||
AS_IF([ test "x$with_pcre" = xsystem], [
|
||||
AS_IF([ test "x$with_pcre" != xno], [
|
||||
PKG_CHECK_MODULES(PCRE,
|
||||
libpcre >= $PCRE_REQUIRED_VERSION)
|
||||
AC_CACHE_CHECK([for Unicode support in PCRE],glib_cv_pcre_has_unicode,[
|
||||
@@ -2499,21 +2498,11 @@ AS_IF([ test "x$with_pcre" = xsystem], [
|
||||
fi
|
||||
AC_SUBST(PCRE_CFLAGS)
|
||||
AC_SUBST(PCRE_LIBS)
|
||||
AC_DEFINE(USE_SYSTEM_PCRE, [], [using the system-supplied PCRE library])
|
||||
PCRE_REQUIRES=libpcre
|
||||
AC_SUBST(PCRE_REQUIRES)
|
||||
], [
|
||||
# If using gcc 4 pass -Wno-pointer-sign when compiling the internal PCRE
|
||||
AS_IF([ test x"$GCC" = xyes], [
|
||||
AC_MSG_CHECKING([whether compiler understands -Wno-pointer-sign])
|
||||
save_CFLAGS="$CFLAGS"
|
||||
CFLAGS="$CFLAGS -Wno-pointer-sign"
|
||||
AC_TRY_COMPILE([],[],[PCRE_WARN_CFLAGS="$PCRE_WARN_CFLAGS -Wno-pointer-sign"
|
||||
AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
|
||||
CFLAGS="$save_CFLAGS"
|
||||
])
|
||||
AC_MSG_ERROR([building without pcre is not supported])
|
||||
])
|
||||
AC_SUBST(PCRE_WARN_CFLAGS)
|
||||
|
||||
dnl **********************
|
||||
dnl *** Win32 API libs ***
|
||||
@@ -3599,8 +3588,6 @@ glib/Makefile
|
||||
glib/glib.stp
|
||||
glib/libcharset/Makefile
|
||||
glib/gnulib/Makefile
|
||||
glib/pcre/Makefile
|
||||
glib/update-pcre/Makefile
|
||||
glib/tests/Makefile
|
||||
gmodule/Makefile
|
||||
gmodule/gmoduleconf.h
|
||||
|
||||
@@ -26,21 +26,14 @@ PRINTF_SUBDIR = gnulib
|
||||
printf_la = gnulib/libgnulib.la
|
||||
endif
|
||||
|
||||
if USE_SYSTEM_PCRE
|
||||
else
|
||||
MAYBE_PCRE = pcre
|
||||
endif
|
||||
|
||||
SUBDIRS = libcharset $(PRINTF_SUBDIR) $(MAYBE_PCRE) update-pcre . tests
|
||||
DIST_SUBDIRS = libcharset gnulib pcre update-pcre tests
|
||||
SUBDIRS = libcharset $(PRINTF_SUBDIR) . tests
|
||||
DIST_SUBDIRS = libcharset gnulib tests
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
$(glib_INCLUDES) \
|
||||
$(pcre_inc) \
|
||||
-DG_LOG_DOMAIN=\"GLib\" \
|
||||
$(GLIB_DEBUG_FLAGS) \
|
||||
-DGLIB_COMPILATION \
|
||||
-DPCRE_STATIC
|
||||
-DGLIB_COMPILATION
|
||||
|
||||
AM_CFLAGS = $(GLIB_WARN_CFLAGS)
|
||||
|
||||
@@ -339,16 +332,8 @@ glib_win32_res = glib-win32-res.o
|
||||
glib_win32_res_ldflag = -Wl,$(glib_win32_res)
|
||||
endif
|
||||
|
||||
if USE_SYSTEM_PCRE
|
||||
pcre_lib = $(PCRE_LIBS)
|
||||
pcre_inc = $(PCRE_CFLAGS)
|
||||
else
|
||||
pcre_lib = pcre/libpcre.la
|
||||
pcre_inc =
|
||||
endif
|
||||
|
||||
libglib_2_0_la_CFLAGS = $(AM_CFLAGS) $(GLIB_HIDDEN_VISIBILITY_CFLAGS)
|
||||
libglib_2_0_la_LIBADD = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ @ICONV_LIBS@ @G_LIBS_EXTRA@ $(pcre_lib) $(G_THREAD_LIBS_EXTRA) $(G_THREAD_LIBS_FOR_GTHREAD)
|
||||
libglib_2_0_la_CFLAGS = $(AM_CFLAGS) $(GLIB_HIDDEN_VISIBILITY_CFLAGS) $(PCRE_CFLAGS)
|
||||
libglib_2_0_la_LIBADD = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ @ICONV_LIBS@ @G_LIBS_EXTRA@ $(PCRE_LIBS) $(G_THREAD_LIBS_EXTRA) $(G_THREAD_LIBS_FOR_GTHREAD)
|
||||
libglib_2_0_la_DEPENDENCIES = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ $(glib_win32_res) $(glib_def)
|
||||
|
||||
libglib_2_0_la_LDFLAGS = $(GLIB_LINK_FLAGS) \
|
||||
|
||||
@@ -23,11 +23,7 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef USE_SYSTEM_PCRE
|
||||
#include <pcre.h>
|
||||
#else
|
||||
#include "pcre/pcre.h"
|
||||
#endif
|
||||
|
||||
#include "gtypes.h"
|
||||
#include "gregex.h"
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
PCRE LICENCE
|
||||
|
||||
Please see the file LICENCE in the PCRE distribution for licensing details.
|
||||
|
||||
End
|
||||
@@ -1,64 +0,0 @@
|
||||
include $(top_srcdir)/glib.mk
|
||||
|
||||
noinst_LTLIBRARIES += libpcre.la
|
||||
|
||||
libpcre_la_CPPFLAGS = \
|
||||
-DG_LOG_DOMAIN=\"GLib-GRegex\" \
|
||||
-DHAVE_CONFIG_H \
|
||||
-DHAVE_MEMMOVE \
|
||||
-DSUPPORT_UCP \
|
||||
-DSUPPORT_UTF \
|
||||
-DSUPPORT_UTF8 \
|
||||
-DNEWLINE=-1 \
|
||||
-DMATCH_LIMIT=10000000 \
|
||||
-DMATCH_LIMIT_RECURSION=8192 \
|
||||
-DMAX_NAME_SIZE=32 \
|
||||
-DMAX_NAME_COUNT=10000 \
|
||||
-DMAX_DUPLENGTH=30000 \
|
||||
-DLINK_SIZE=2 \
|
||||
-DPOSIX_MALLOC_THRESHOLD=10 \
|
||||
-DPCRE_STATIC \
|
||||
-UBSR_ANYCRLF \
|
||||
-UEBCDIC \
|
||||
$(glib_INCLUDES) \
|
||||
@GLIB_DEBUG_FLAGS@ \
|
||||
-DGLIB_COMPILATION \
|
||||
$(AM_CPPFLAGS)
|
||||
|
||||
libpcre_la_CFLAGS = \
|
||||
$(GLIB_HIDDEN_VISIBILITY_CFLAGS) \
|
||||
$(PCRE_WARN_CFLAGS) \
|
||||
$(DEP_CFLAGS) \
|
||||
$(AM_CFLAGS)
|
||||
|
||||
libpcre_la_LDFLAGS = \
|
||||
-no-undefined \
|
||||
$(AM_LDFLAGS)
|
||||
|
||||
libpcre_la_SOURCES = \
|
||||
pcre_byte_order.c \
|
||||
pcre_chartables.c \
|
||||
pcre_compile.c \
|
||||
pcre_config.c \
|
||||
pcre_dfa_exec.c \
|
||||
pcre_exec.c \
|
||||
pcre_fullinfo.c \
|
||||
pcre_get.c \
|
||||
pcre_globals.c \
|
||||
pcre_jit_compile.c \
|
||||
pcre_newline.c \
|
||||
pcre_ord2utf8.c \
|
||||
pcre_string_utils.c \
|
||||
pcre_study.c \
|
||||
pcre_tables.c \
|
||||
pcre_valid_utf8.c \
|
||||
pcre_xclass.c \
|
||||
pcre.h \
|
||||
pcre_internal.h \
|
||||
ucp.h
|
||||
|
||||
libpcre_la_LIBADD = $(DEP_LIBS)
|
||||
|
||||
EXTRA_DIST += \
|
||||
COPYING \
|
||||
makefile.msc
|
||||
@@ -1,30 +0,0 @@
|
||||
TOP = ..\..\..
|
||||
!INCLUDE ..\..\build\win32\make.msc
|
||||
|
||||
INCLUDES = \
|
||||
-I ..\.. \
|
||||
-I ..
|
||||
|
||||
DEFINES = \
|
||||
-DPCRE_STATIC \
|
||||
-DHAVE_CONFIG_H \
|
||||
-DHAVE_LONG_LONG_FORMAT \
|
||||
-DSUPPORT_UCP \
|
||||
-DSUPPORT_UTF8 \
|
||||
-DNEWLINE=-1 \
|
||||
-DMATCH_LIMIT=10000000 \
|
||||
-DMATCH_LIMIT_RECURSION=10000000 \
|
||||
-DMAX_NAME_SIZE=32 \
|
||||
-DMAX_NAME_COUNT=10000 \
|
||||
-DMAX_DUPLENGTH=30000 \
|
||||
-DLINK_SIZE=2 \
|
||||
-DEBCDIC=0 \
|
||||
-DPOSIX_MALLOC_THRESHOLD=10
|
||||
|
||||
OBJECTS = \
|
||||
|
||||
|
||||
all : pcre.lib
|
||||
|
||||
pcre.lib : $(OBJECTS)
|
||||
lib -out:pcre.lib $(OBJECTS)
|
||||
507
glib/pcre/pcre.h
507
glib/pcre/pcre.h
@@ -1,507 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, to be #included by
|
||||
applications that call the PCRE functions.
|
||||
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 8
|
||||
#define PCRE_MINOR 31
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2012-07-06
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
||||
# ifndef PCRE_EXP_DECL
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
||||
it is needed here for malloc. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options. Some are compile-time only, some are run-time only, and some are
|
||||
both, so we keep them all distinct. However, almost all the bits in the options
|
||||
word are now used. In the long run, we may have to re-use some of the
|
||||
compile-time only bits for runtime options, or vice versa. In the comments
|
||||
below, "compile", "exec", and "DFA exec" mean that the option is permitted to
|
||||
be set for those functions; "used in" means that an option may be set only for
|
||||
compile, but is subsequently referenced in exec and/or DFA exec. Any of the
|
||||
compile-time options may be inspected during studying (and therefore JIT
|
||||
compiling). */
|
||||
|
||||
#define PCRE_CASELESS 0x00000001 /* Compile */
|
||||
#define PCRE_MULTILINE 0x00000002 /* Compile */
|
||||
#define PCRE_DOTALL 0x00000004 /* Compile */
|
||||
#define PCRE_EXTENDED 0x00000008 /* Compile */
|
||||
#define PCRE_ANCHORED 0x00000010 /* Compile, exec, DFA exec */
|
||||
#define PCRE_DOLLAR_ENDONLY 0x00000020 /* Compile, used in exec, DFA exec */
|
||||
#define PCRE_EXTRA 0x00000040 /* Compile */
|
||||
#define PCRE_NOTBOL 0x00000080 /* Exec, DFA exec */
|
||||
#define PCRE_NOTEOL 0x00000100 /* Exec, DFA exec */
|
||||
#define PCRE_UNGREEDY 0x00000200 /* Compile */
|
||||
#define PCRE_NOTEMPTY 0x00000400 /* Exec, DFA exec */
|
||||
/* The next two are also used in exec and DFA exec */
|
||||
#define PCRE_UTF8 0x00000800 /* Compile (same as PCRE_UTF16) */
|
||||
#define PCRE_UTF16 0x00000800 /* Compile (same as PCRE_UTF8) */
|
||||
#define PCRE_NO_AUTO_CAPTURE 0x00001000 /* Compile */
|
||||
/* The next two are also used in exec and DFA exec */
|
||||
#define PCRE_NO_UTF8_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF16_CHECK) */
|
||||
#define PCRE_NO_UTF16_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF8_CHECK) */
|
||||
#define PCRE_AUTO_CALLOUT 0x00004000 /* Compile */
|
||||
#define PCRE_PARTIAL_SOFT 0x00008000 /* Exec, DFA exec */
|
||||
#define PCRE_PARTIAL 0x00008000 /* Backwards compatible synonym */
|
||||
#define PCRE_DFA_SHORTEST 0x00010000 /* DFA exec */
|
||||
#define PCRE_DFA_RESTART 0x00020000 /* DFA exec */
|
||||
#define PCRE_FIRSTLINE 0x00040000 /* Compile, used in exec, DFA exec */
|
||||
#define PCRE_DUPNAMES 0x00080000 /* Compile */
|
||||
#define PCRE_NEWLINE_CR 0x00100000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_NEWLINE_LF 0x00200000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_NEWLINE_ANY 0x00400000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_BSR_ANYCRLF 0x00800000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_BSR_UNICODE 0x01000000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* Compile, used in exec */
|
||||
#define PCRE_NO_START_OPTIMIZE 0x04000000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_NO_START_OPTIMISE 0x04000000 /* Synonym */
|
||||
#define PCRE_PARTIAL_HARD 0x08000000 /* Exec, DFA exec */
|
||||
#define PCRE_NOTEMPTY_ATSTART 0x10000000 /* Exec, DFA exec */
|
||||
#define PCRE_UCP 0x20000000 /* Compile, used in exec, DFA exec */
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
#define PCRE_ERROR_NOMATCH (-1)
|
||||
#define PCRE_ERROR_NULL (-2)
|
||||
#define PCRE_ERROR_BADOPTION (-3)
|
||||
#define PCRE_ERROR_BADMAGIC (-4)
|
||||
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
|
||||
#define PCRE_ERROR_NOMEMORY (-6)
|
||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||
#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_PARTIAL (-12)
|
||||
#define PCRE_ERROR_BADPARTIAL (-13)
|
||||
#define PCRE_ERROR_INTERNAL (-14)
|
||||
#define PCRE_ERROR_BADCOUNT (-15)
|
||||
#define PCRE_ERROR_DFA_UITEM (-16)
|
||||
#define PCRE_ERROR_DFA_UCOND (-17)
|
||||
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
|
||||
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||
#define PCRE_ERROR_BADOFFSET (-24)
|
||||
#define PCRE_ERROR_SHORTUTF8 (-25)
|
||||
#define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_RECURSELOOP (-26)
|
||||
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
|
||||
#define PCRE_ERROR_BADMODE (-28)
|
||||
#define PCRE_ERROR_BADENDIANNESS (-29)
|
||||
#define PCRE_ERROR_DFA_BADRESTART (-30)
|
||||
|
||||
/* Specific error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE_UTF8_ERR0 0
|
||||
#define PCRE_UTF8_ERR1 1
|
||||
#define PCRE_UTF8_ERR2 2
|
||||
#define PCRE_UTF8_ERR3 3
|
||||
#define PCRE_UTF8_ERR4 4
|
||||
#define PCRE_UTF8_ERR5 5
|
||||
#define PCRE_UTF8_ERR6 6
|
||||
#define PCRE_UTF8_ERR7 7
|
||||
#define PCRE_UTF8_ERR8 8
|
||||
#define PCRE_UTF8_ERR9 9
|
||||
#define PCRE_UTF8_ERR10 10
|
||||
#define PCRE_UTF8_ERR11 11
|
||||
#define PCRE_UTF8_ERR12 12
|
||||
#define PCRE_UTF8_ERR13 13
|
||||
#define PCRE_UTF8_ERR14 14
|
||||
#define PCRE_UTF8_ERR15 15
|
||||
#define PCRE_UTF8_ERR16 16
|
||||
#define PCRE_UTF8_ERR17 17
|
||||
#define PCRE_UTF8_ERR18 18
|
||||
#define PCRE_UTF8_ERR19 19
|
||||
#define PCRE_UTF8_ERR20 20
|
||||
#define PCRE_UTF8_ERR21 21
|
||||
|
||||
/* Specific error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE_UTF16_ERR0 0
|
||||
#define PCRE_UTF16_ERR1 1
|
||||
#define PCRE_UTF16_ERR2 2
|
||||
#define PCRE_UTF16_ERR3 3
|
||||
#define PCRE_UTF16_ERR4 4
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
|
||||
#define PCRE_INFO_OPTIONS 0
|
||||
#define PCRE_INFO_SIZE 1
|
||||
#define PCRE_INFO_CAPTURECOUNT 2
|
||||
#define PCRE_INFO_BACKREFMAX 3
|
||||
#define PCRE_INFO_FIRSTBYTE 4
|
||||
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
||||
#define PCRE_INFO_FIRSTTABLE 5
|
||||
#define PCRE_INFO_LASTLITERAL 6
|
||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||
#define PCRE_INFO_NAMECOUNT 8
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
#define PCRE_INFO_OKPARTIAL 12
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
#define PCRE_INFO_HASCRORLF 14
|
||||
#define PCRE_INFO_MINLENGTH 15
|
||||
#define PCRE_INFO_JIT 16
|
||||
#define PCRE_INFO_JITSIZE 17
|
||||
#define PCRE_INFO_MAXLOOKBEHIND 18
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_CONFIG_UTF8 0
|
||||
#define PCRE_CONFIG_NEWLINE 1
|
||||
#define PCRE_CONFIG_LINK_SIZE 2
|
||||
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||
#define PCRE_CONFIG_STACKRECURSE 5
|
||||
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||
#define PCRE_CONFIG_BSR 8
|
||||
#define PCRE_CONFIG_JIT 9
|
||||
#define PCRE_CONFIG_UTF16 10
|
||||
#define PCRE_CONFIG_JITTARGET 11
|
||||
|
||||
/* Request types for pcre_study(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_STUDY_JIT_COMPILE 0x0001
|
||||
#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002
|
||||
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004
|
||||
|
||||
/* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine
|
||||
these bits, just add new ones on the end, in order to remain compatible. */
|
||||
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||
#define PCRE_EXTRA_TABLES 0x0008
|
||||
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
||||
#define PCRE_EXTRA_MARK 0x0020
|
||||
#define PCRE_EXTRA_EXECUTABLE_JIT 0x0040
|
||||
|
||||
/* Types */
|
||||
|
||||
struct real_pcre; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
|
||||
struct real_pcre16; /* declaration; the definition is private */
|
||||
typedef struct real_pcre16 pcre16;
|
||||
|
||||
struct real_pcre_jit_stack; /* declaration; the definition is private */
|
||||
typedef struct real_pcre_jit_stack pcre_jit_stack;
|
||||
|
||||
struct real_pcre16_jit_stack; /* declaration; the definition is private */
|
||||
typedef struct real_pcre16_jit_stack pcre16_jit_stack;
|
||||
|
||||
/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
|
||||
a 16 bit wide signed data type. Otherwise it can be a dummy data type since
|
||||
pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
|
||||
#ifndef PCRE_UCHAR16
|
||||
#define PCRE_UCHAR16 unsigned short
|
||||
#endif
|
||||
|
||||
#ifndef PCRE_SPTR16
|
||||
#define PCRE_SPTR16 const PCRE_UCHAR16 *
|
||||
#endif
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
||||
replaced with a custom type. For conventional use, the public interface is a
|
||||
const char *. */
|
||||
|
||||
#ifndef PCRE_SPTR
|
||||
#define PCRE_SPTR const char *
|
||||
#endif
|
||||
|
||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||
such as way as to be extensible. Always add new fields at the end, in order to
|
||||
remain compatible. */
|
||||
|
||||
typedef struct pcre_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
unsigned char **mark; /* For passing back a mark pointer */
|
||||
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
||||
} pcre_extra;
|
||||
|
||||
/* Same structure as above, but with 16 bit char pointers. */
|
||||
|
||||
typedef struct pcre16_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
PCRE_UCHAR16 **mark; /* For passing back a mark pointer */
|
||||
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
||||
} pcre16_extra;
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. */
|
||||
|
||||
typedef struct pcre_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
PCRE_SPTR subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------- Added for Version 2 -------------------------- */
|
||||
const unsigned char *mark; /* Pointer to current mark or NULL */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre_callout_block;
|
||||
|
||||
/* Same structure as above, but with 16 bit char pointers. */
|
||||
|
||||
typedef struct pcre16_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
PCRE_SPTR16 subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------- Added for Version 2 -------------------------- */
|
||||
const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre16_callout_block;
|
||||
|
||||
/* Indirection for store get and free functions. These can be set to
|
||||
alternative malloc/free functions if required. Special ones are used in the
|
||||
non-recursive case for "frames". There is also an optional callout function
|
||||
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||
have to take another form. */
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||
|
||||
PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre16_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre16_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *);
|
||||
#else /* VPCOMPAT */
|
||||
PCRE_EXP_DECL void *pcre_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_free(void *);
|
||||
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
||||
|
||||
PCRE_EXP_DECL void *pcre16_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre16_free(void *);
|
||||
PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre16_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *);
|
||||
#endif /* VPCOMPAT */
|
||||
|
||||
/* User defined callback which provides a stack just before the match starts. */
|
||||
|
||||
typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
|
||||
typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre16_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, char *, int);
|
||||
PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
|
||||
int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
|
||||
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int,
|
||||
char *, int);
|
||||
PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
|
||||
PCRE_UCHAR16 *, int);
|
||||
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
|
||||
PCRE_SPTR16, int, int, int, int *, int , int *, int);
|
||||
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *,
|
||||
PCRE_SPTR16, int, int, int, int *, int);
|
||||
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
||||
PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
|
||||
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
||||
PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
|
||||
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
|
||||
void *);
|
||||
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
|
||||
int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
|
||||
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
|
||||
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
char **, char **);
|
||||
PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
|
||||
PCRE_UCHAR16 **, PCRE_UCHAR16 **);
|
||||
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int,
|
||||
PCRE_SPTR16 *);
|
||||
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int,
|
||||
PCRE_SPTR16 **);
|
||||
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
||||
PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
|
||||
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
||||
PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int);
|
||||
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
|
||||
PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
|
||||
PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
|
||||
PCRE_EXP_DECL const char *pcre_version(void);
|
||||
PCRE_EXP_DECL const char *pcre16_version(void);
|
||||
|
||||
/* Utility functions for byte order swaps. */
|
||||
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
|
||||
PCRE_SPTR16, int, int *, int);
|
||||
|
||||
/* JIT compiler related functions. */
|
||||
|
||||
PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
|
||||
PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
|
||||
PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
|
||||
PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
|
||||
PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
|
||||
pcre_jit_callback, void *);
|
||||
PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
|
||||
pcre16_jit_callback, void *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcre.h */
|
||||
@@ -1,288 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that tests a compiled pattern to
|
||||
see if it was compiled with the opposite endianness. If so, it uses an
|
||||
auxiliary local function to flip the appropriate bytes. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Swap byte functions *
|
||||
*************************************************/
|
||||
|
||||
/* The following functions swap the bytes of a pcre_uint16
|
||||
and pcre_uint32 value.
|
||||
|
||||
Arguments:
|
||||
value any number
|
||||
|
||||
Returns: the byte swapped value
|
||||
*/
|
||||
|
||||
static pcre_uint32
|
||||
swap_uint32(pcre_uint32 value)
|
||||
{
|
||||
return ((value & 0x000000ff) << 24) |
|
||||
((value & 0x0000ff00) << 8) |
|
||||
((value & 0x00ff0000) >> 8) |
|
||||
(value >> 24);
|
||||
}
|
||||
|
||||
static pcre_uint16
|
||||
swap_uint16(pcre_uint16 value)
|
||||
{
|
||||
return (value >> 8) | (value << 8);
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Test for a byte-flipped compiled regex *
|
||||
*************************************************/
|
||||
|
||||
/* This function swaps the bytes of a compiled pattern usually
|
||||
loaded form the disk. It also sets the tables pointer, which
|
||||
is likely an invalid pointer after reload.
|
||||
|
||||
Arguments:
|
||||
argument_re points to the compiled expression
|
||||
extra_data points to extra data or is NULL
|
||||
tables points to the character tables or NULL
|
||||
|
||||
Returns: 0 if the swap is successful, negative on error
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *argument_re,
|
||||
pcre_extra *extra_data, const unsigned char *tables)
|
||||
#else
|
||||
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *argument_re,
|
||||
pcre16_extra *extra_data, const unsigned char *tables)
|
||||
#endif
|
||||
{
|
||||
REAL_PCRE *re = (REAL_PCRE *)argument_re;
|
||||
pcre_study_data *study;
|
||||
#ifndef COMPILE_PCRE8
|
||||
pcre_uchar *ptr;
|
||||
int length;
|
||||
#ifdef SUPPORT_UTF
|
||||
BOOL utf;
|
||||
BOOL utf16_char;
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* !COMPILE_PCRE8 */
|
||||
|
||||
if (re == NULL) return PCRE_ERROR_NULL;
|
||||
if (re->magic_number == MAGIC_NUMBER)
|
||||
{
|
||||
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
||||
re->tables = tables;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
|
||||
if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
||||
|
||||
re->magic_number = MAGIC_NUMBER;
|
||||
re->size = swap_uint32(re->size);
|
||||
re->options = swap_uint32(re->options);
|
||||
re->flags = swap_uint16(re->flags);
|
||||
re->top_bracket = swap_uint16(re->top_bracket);
|
||||
re->top_backref = swap_uint16(re->top_backref);
|
||||
re->first_char = swap_uint16(re->first_char);
|
||||
re->req_char = swap_uint16(re->req_char);
|
||||
re->name_table_offset = swap_uint16(re->name_table_offset);
|
||||
re->name_entry_size = swap_uint16(re->name_entry_size);
|
||||
re->name_count = swap_uint16(re->name_count);
|
||||
re->ref_count = swap_uint16(re->ref_count);
|
||||
re->tables = tables;
|
||||
|
||||
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
{
|
||||
study = (pcre_study_data *)extra_data->study_data;
|
||||
study->size = swap_uint32(study->size);
|
||||
study->flags = swap_uint32(study->flags);
|
||||
study->minlength = swap_uint32(study->minlength);
|
||||
}
|
||||
|
||||
#ifndef COMPILE_PCRE8
|
||||
ptr = (pcre_uchar *)re + re->name_table_offset;
|
||||
length = re->name_count * re->name_entry_size;
|
||||
#ifdef SUPPORT_UTF
|
||||
utf = (re->options & PCRE_UTF16) != 0;
|
||||
utf16_char = FALSE;
|
||||
#endif
|
||||
|
||||
while(TRUE)
|
||||
{
|
||||
/* Swap previous characters. */
|
||||
while (length-- > 0)
|
||||
{
|
||||
*ptr = swap_uint16(*ptr);
|
||||
ptr++;
|
||||
}
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf16_char)
|
||||
{
|
||||
if (HAS_EXTRALEN(ptr[-1]))
|
||||
{
|
||||
/* We know that there is only one extra character in UTF-16. */
|
||||
*ptr = swap_uint16(*ptr);
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
utf16_char = FALSE;
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
/* Get next opcode. */
|
||||
length = 0;
|
||||
*ptr = swap_uint16(*ptr);
|
||||
switch (*ptr)
|
||||
{
|
||||
case OP_END:
|
||||
return 0;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_EXACT:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSUPTO:
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_EXACTI:
|
||||
case OP_POSSTARI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
if (utf) utf16_char = TRUE;
|
||||
#endif
|
||||
/* Fall through. */
|
||||
|
||||
default:
|
||||
length = PRIV(OP_lengths)[*ptr] - 1;
|
||||
break;
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
/* Skip the character bit map. */
|
||||
ptr += 32/sizeof(pcre_uchar);
|
||||
length = 0;
|
||||
break;
|
||||
|
||||
case OP_XCLASS:
|
||||
/* Reverse the size of the XCLASS instance. */
|
||||
ptr++;
|
||||
*ptr = swap_uint16(*ptr);
|
||||
if (LINK_SIZE > 1)
|
||||
{
|
||||
/* LINK_SIZE can be 1 or 2 in 16 bit mode. */
|
||||
ptr++;
|
||||
*ptr = swap_uint16(*ptr);
|
||||
}
|
||||
ptr++;
|
||||
length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1);
|
||||
*ptr = swap_uint16(*ptr);
|
||||
if ((*ptr & XCL_MAP) != 0)
|
||||
{
|
||||
/* Skip the character bit map. */
|
||||
ptr += 32/sizeof(pcre_uchar);
|
||||
length -= 32/sizeof(pcre_uchar);
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
/* Control should never reach here in 16 bit mode. */
|
||||
#endif /* !COMPILE_PCRE8 */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_byte_order.c */
|
||||
@@ -1,198 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file contains character tables that are used when no external tables
|
||||
are passed to PCRE by the application that calls it. The tables are used only
|
||||
for characters whose code values are less than 256.
|
||||
|
||||
This is a default version of the tables that assumes ASCII encoding. A program
|
||||
called dftables (which is distributed with PCRE) can be used to build
|
||||
alternative versions of this file. This is necessary if you are running in an
|
||||
EBCDIC environment, or if you want to default to a different encoding, for
|
||||
example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
|
||||
The following #includes are present because without them gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
const pcre_uint8 PRIV(default_tables)[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table is a case flipping table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
/* This table identifies various classes of character by individual bits:
|
||||
0x01 white space character
|
||||
0x02 letter
|
||||
0x04 decimal digit
|
||||
0x08 hexadecimal digit
|
||||
0x10 alphanumeric or '_'
|
||||
0x80 regular expression metacharacter or binary zero
|
||||
*/
|
||||
|
||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
|
||||
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of pcre_chartables.c */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,170 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_config(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
/* Keep the original link size. */
|
||||
static int real_link_size = LINK_SIZE;
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/* This function has an extensible interface so that additional items can be
|
||||
added compatibly.
|
||||
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_config(int what, void *where)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_config(int what, void *where)
|
||||
#endif
|
||||
{
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_CONFIG_UTF8:
|
||||
#if defined COMPILE_PCRE16
|
||||
*((int *)where) = 0;
|
||||
return PCRE_ERROR_BADOPTION;
|
||||
#else
|
||||
#if defined SUPPORT_UTF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
|
||||
case PCRE_CONFIG_UTF16:
|
||||
#if defined COMPILE_PCRE8
|
||||
*((int *)where) = 0;
|
||||
return PCRE_ERROR_BADOPTION;
|
||||
#else
|
||||
#if defined SUPPORT_UTF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
|
||||
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
||||
#ifdef SUPPORT_UCP
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_JIT:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_JITTARGET:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((const char **)where) = PRIV(jit_get_target)();
|
||||
#else
|
||||
*((const char **)where) = NULL;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_NEWLINE:
|
||||
*((int *)where) = NEWLINE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_LINK_SIZE:
|
||||
*((int *)where) = real_link_size;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
||||
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT:
|
||||
*((unsigned long int *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
||||
*((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_STACKRECURSE:
|
||||
#ifdef NO_RECURSE
|
||||
*((int *)where) = 0;
|
||||
#else
|
||||
*((int *)where) = 1;
|
||||
#endif
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_config.c */
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,206 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_fullinfo(), which returns
|
||||
information about a compiled pattern. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is a newer "info" function which has an extensible interface so
|
||||
that additional items can be added compatibly.
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
extra_data points extra data, or NULL
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
int what, void *where)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data,
|
||||
int what, void *where)
|
||||
#endif
|
||||
{
|
||||
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
|
||||
const pcre_study_data *study = NULL;
|
||||
|
||||
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
||||
|
||||
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
|
||||
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
|
||||
means that the pattern is likely compiled with different endianness. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
||||
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
||||
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_INFO_OPTIONS:
|
||||
*((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_SIZE:
|
||||
*((size_t *)where) = re->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_STUDYSIZE:
|
||||
*((size_t *)where) = (study == NULL)? 0 : study->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JITSIZE:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((size_t *)where) =
|
||||
(extra_data != NULL &&
|
||||
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
|
||||
extra_data->executable_jit != NULL)?
|
||||
PRIV(jit_get_size)(extra_data->executable_jit) : 0;
|
||||
#else
|
||||
*((size_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_INFO_CAPTURECOUNT:
|
||||
*((int *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_BACKREFMAX:
|
||||
*((int *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_FIRSTBYTE:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
|
||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
break;
|
||||
|
||||
/* Make sure we pass back the pointer to the bit vector in the external
|
||||
block, not the internal copy (with flipped integer fields). */
|
||||
|
||||
case PCRE_INFO_FIRSTTABLE:
|
||||
*((const pcre_uint8 **)where) =
|
||||
(study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
|
||||
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_MINLENGTH:
|
||||
*((int *)where) =
|
||||
(study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
|
||||
(int)(study->minlength) : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JIT:
|
||||
*((int *)where) = extra_data != NULL &&
|
||||
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
|
||||
extra_data->executable_jit != NULL;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_LASTLITERAL:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_REQCHSET) != 0)? re->req_char : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMEENTRYSIZE:
|
||||
*((int *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMECOUNT:
|
||||
*((int *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMETABLE:
|
||||
*((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_DEFAULT_TABLES:
|
||||
*((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables));
|
||||
break;
|
||||
|
||||
/* From release 8.00 this will always return TRUE because NOPARTIAL is
|
||||
no longer ever set (the restrictions have been removed). */
|
||||
|
||||
case PCRE_INFO_OKPARTIAL:
|
||||
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JCHANGED:
|
||||
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_HASCRORLF:
|
||||
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_MAXLOOKBEHIND:
|
||||
*((int *)where) = re->max_lookbehind;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_fullinfo.c */
|
||||
@@ -1,587 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains some convenience functions for extracting substrings
|
||||
from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used by the get_first_set() function below, as well
|
||||
as being generally available. It assumes that names are unique.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parentheses, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
|
||||
#endif
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
pcre_uchar *nametable;
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
#ifdef COMPILE_PCRE16
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
pcre_uchar *entry = nametable + entrysize*mid;
|
||||
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(entry + IMM2_SIZE));
|
||||
if (c == 0) return GET2(entry, 0);
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This is used by the get_first_set() function below, as well as being
|
||||
generally available. It is used when duplicated names are permitted.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: the length of each entry, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||
char **firstptr, char **lastptr)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
|
||||
PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
|
||||
#endif
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
pcre_uchar *nametable, *lastentry;
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
#ifdef COMPILE_PCRE16
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
|
||||
lastentry = nametable + entrysize * (top - 1);
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
pcre_uchar *entry = nametable + entrysize*mid;
|
||||
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(entry + IMM2_SIZE));
|
||||
if (c == 0)
|
||||
{
|
||||
pcre_uchar *first = entry;
|
||||
pcre_uchar *last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
#ifdef COMPILE_PCRE8
|
||||
*firstptr = (char *)first;
|
||||
*lastptr = (char *)last;
|
||||
#else
|
||||
*firstptr = (PCRE_UCHAR16 *)first;
|
||||
*lastptr = (PCRE_UCHAR16 *)last;
|
||||
#endif
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find first set of multiple named strings *
|
||||
*************************************************/
|
||||
|
||||
/* This function allows for duplicate names in the table of named substrings.
|
||||
It returns the number of the first one that was set in a pattern match.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name of the capturing substring
|
||||
ovector the vector of matched substrings
|
||||
|
||||
Returns: the number of the first that is set,
|
||||
or the number of the last one if none are set,
|
||||
or a negative number on error
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
static int
|
||||
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
||||
#else
|
||||
static int
|
||||
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
|
||||
#endif
|
||||
{
|
||||
const REAL_PCRE *re = (const REAL_PCRE *)code;
|
||||
int entrysize;
|
||||
pcre_uchar *entry;
|
||||
#ifdef COMPILE_PCRE8
|
||||
char *first, *last;
|
||||
#else
|
||||
PCRE_UCHAR16 *first, *last;
|
||||
#endif
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||
return pcre_get_stringnumber(code, stringname);
|
||||
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||
#else
|
||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||
return pcre16_get_stringnumber(code, stringname);
|
||||
entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
|
||||
#endif
|
||||
if (entrysize <= 0) return entrysize;
|
||||
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
|
||||
{
|
||||
int n = GET2(entry, 0);
|
||||
if (ovector[n*2] >= 0) return n;
|
||||
}
|
||||
return GET2(entry, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer.
|
||||
Note that we use memcpy() rather than strncpy() in case there are binary zeros
|
||||
in the string.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, char *buffer, int size)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||
int stringnumber, PCRE_UCHAR16 *buffer, int size)
|
||||
#endif
|
||||
{
|
||||
int yield;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
|
||||
buffer[yield] = 0;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject,
|
||||
int *ovector, int stringcount, const char *stringname,
|
||||
char *buffer, int size)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
|
||||
int *ovector, int stringcount, PCRE_SPTR16 stringname,
|
||||
PCRE_UCHAR16 *buffer, int size)
|
||||
#endif
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
#ifdef COMPILE_PCRE8
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
#else
|
||||
return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy all captured strings to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of store and builds a list of pointers and all
|
||||
of the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
listptr set to point to the list of pointers
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||
const char ***listptr)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||
PCRE_SPTR16 **listptr)
|
||||
#endif
|
||||
{
|
||||
int i;
|
||||
int size = sizeof(pcre_uchar *);
|
||||
int double_count = stringcount * 2;
|
||||
pcre_uchar **stringlist;
|
||||
pcre_uchar *p;
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
|
||||
|
||||
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
|
||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
*listptr = (const char **)stringlist;
|
||||
#else
|
||||
*listptr = (PCRE_SPTR16 *)stringlist;
|
||||
#endif
|
||||
p = (pcre_uchar *)(stringlist + stringcount + 1);
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
{
|
||||
int len = ovector[i+1] - ovector[i];
|
||||
memcpy(p, subject + ovector[i], IN_UCHARS(len));
|
||||
*stringlist++ = p;
|
||||
p += len;
|
||||
*p++ = 0;
|
||||
}
|
||||
|
||||
*stringlist = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring_list *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (PUBL(free))()
|
||||
directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring_list()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre_free_substring_list(const char **pointer)
|
||||
#else
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre16_free_substring_list(PCRE_SPTR16 *pointer)
|
||||
#endif
|
||||
{
|
||||
(PUBL(free))((void *)pointer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
store
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the substring
|
||||
|
||||
Returns: if successful:
|
||||
the length of the string, not including the zero that
|
||||
is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, const char **stringptr)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||
int stringnumber, PCRE_SPTR16 *stringptr)
|
||||
#endif
|
||||
{
|
||||
int yield;
|
||||
pcre_uchar *substring;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
|
||||
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
|
||||
substring[yield] = 0;
|
||||
#ifdef COMPILE_PCRE8
|
||||
*stringptr = (const char *)substring;
|
||||
#else
|
||||
*stringptr = (PCRE_SPTR16)substring;
|
||||
#endif
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new store. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_named_substring(const pcre *code, const char *subject,
|
||||
int *ovector, int stringcount, const char *stringname,
|
||||
const char **stringptr)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
|
||||
int *ovector, int stringcount, PCRE_SPTR16 stringname,
|
||||
PCRE_SPTR16 *stringptr)
|
||||
#endif
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
#ifdef COMPILE_PCRE8
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
#else
|
||||
return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (PUBL(free))()
|
||||
directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre_free_substring(const char *pointer)
|
||||
#else
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre16_free_substring(PCRE_SPTR16 pointer)
|
||||
#endif
|
||||
{
|
||||
(PUBL(free))((void *)pointer);
|
||||
}
|
||||
|
||||
/* End of pcre_get.c */
|
||||
@@ -1,90 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains global variables that are exported by the PCRE library.
|
||||
PCRE is thread-clean and doesn't use any global variables in the normal sense.
|
||||
However, it calls memory allocation and freeing functions via the four
|
||||
indirections below, and it can optionally do callouts, using the fifth
|
||||
indirection. These values can be changed by the caller, but are shared between
|
||||
all threads.
|
||||
|
||||
For MS Visual Studio and Symbian OS, there are problems in initializing these
|
||||
variables to non-local functions. In these cases, therefore, an indirection via
|
||||
a local function is used.
|
||||
|
||||
Also, when compiling for Virtual Pascal, things are done differently, and
|
||||
global variables are not used. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#ifdef GLIB_COMPILATION
|
||||
#include "gmem.h"
|
||||
#else
|
||||
#include <glib.h>
|
||||
#endif /* GLIB_COMPILATION */
|
||||
|
||||
#if defined _MSC_VER || defined __SYMBIAN32__
|
||||
static void* LocalPcreMalloc(size_t aSize)
|
||||
{
|
||||
return malloc(aSize);
|
||||
}
|
||||
static void LocalPcreFree(void* aPtr)
|
||||
{
|
||||
free(aPtr);
|
||||
}
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
|
||||
#elif !defined VPCOMPAT
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = g_try_malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = g_free;
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = g_try_malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = g_free;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,184 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is less than the end of the
|
||||
string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
|
||||
BOOL utf)
|
||||
{
|
||||
int c;
|
||||
(void)utf;
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
c = *ptr;
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = 1; return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: /* LF */
|
||||
case 0x000b: /* VT */
|
||||
case 0x000c: *lenptr = 1; return TRUE; /* FF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
#ifdef COMPILE_PCRE8
|
||||
case 0x0085: *lenptr = utf? 2 : 1; return TRUE; /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
#else
|
||||
case 0x0085: /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||
#endif /* COMPILE_PCRE8 */
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is greater than the start of
|
||||
the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
|
||||
BOOL utf)
|
||||
{
|
||||
int c;
|
||||
(void)utf;
|
||||
ptr--;
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
c = *ptr;
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000b: /* VT */
|
||||
case 0x000c: /* FF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
#ifdef COMPILE_PCRE8
|
||||
case 0x0085: *lenptr = utf? 2 : 1; return TRUE; /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
#else
|
||||
case 0x0085: /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||
#endif /* COMPILE_PCRE8 */
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre_newline.c */
|
||||
@@ -1,97 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert character value to UTF-8 *
|
||||
*************************************************/
|
||||
|
||||
/* This function takes an integer value in the range 0 - 0x10ffff
|
||||
and encodes it as a UTF-8 character in 1 to 6 pcre_uchars.
|
||||
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result - at least 6 pcre_uchars long
|
||||
|
||||
Returns: number of characters placed in the buffer
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
|
||||
int i, j;
|
||||
|
||||
/* Checking invalid cvalue character, encoded as invalid UTF-16 character.
|
||||
Should never happen in practice. */
|
||||
if ((cvalue & 0xf800) == 0xd800 || cvalue >= 0x110000)
|
||||
cvalue = 0xfffe;
|
||||
|
||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
||||
buffer += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = PRIV(utf8_table2)[i] | cvalue;
|
||||
return i + 1;
|
||||
|
||||
#else
|
||||
|
||||
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
|
||||
(void)(buffer); /* called when SUPPORT_UTF is not defined. */
|
||||
return 0;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/* End of pcre_ord2utf8.c */
|
||||
@@ -1,168 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class. It is used by both pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#ifndef COMPILE_PCRE8
|
||||
|
||||
/*************************************************
|
||||
* Compare string utilities *
|
||||
*************************************************/
|
||||
|
||||
/* The following two functions compares two strings. Basically an strcmp
|
||||
for non 8 bit characters.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0 if both string are equal (like strcmp), 1 otherwise
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp_uc_uc)(const pcre_uchar *str1, const pcre_uchar *str2)
|
||||
{
|
||||
pcre_uchar c1;
|
||||
pcre_uchar c2;
|
||||
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2)
|
||||
return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
/* Both length and characters must be equal. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
PRIV(strcmp_uc_c8)(const pcre_uchar *str1, const char *str2)
|
||||
{
|
||||
const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
|
||||
pcre_uchar c1;
|
||||
pcre_uchar c2;
|
||||
|
||||
while (*str1 != '\0' || *ustr2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = (pcre_uchar)*ustr2++;
|
||||
if (c1 != c2)
|
||||
return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
/* Both length and characters must be equal. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The following two functions compares two, fixed length
|
||||
strings. Basically an strncmp for non 8 bit characters.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
num size of the string
|
||||
|
||||
Returns: 0 if both string are equal (like strcmp), 1 otherwise
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp_uc_uc)(const pcre_uchar *str1, const pcre_uchar *str2, unsigned int num)
|
||||
{
|
||||
pcre_uchar c1;
|
||||
pcre_uchar c2;
|
||||
|
||||
while (num-- > 0)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2)
|
||||
return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
/* Both length and characters must be equal. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
PRIV(strncmp_uc_c8)(const pcre_uchar *str1, const char *str2, unsigned int num)
|
||||
{
|
||||
const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
|
||||
pcre_uchar c1;
|
||||
pcre_uchar c2;
|
||||
|
||||
while (num-- > 0)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = (pcre_uchar)*ustr2++;
|
||||
if (c1 != c2)
|
||||
return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
/* Both length and characters must be equal. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The following function returns with the length of
|
||||
a zero terminated string. Basically an strlen for non 8 bit characters.
|
||||
|
||||
Arguments:
|
||||
str string
|
||||
|
||||
Returns: length of the string
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
PRIV(strlen_uc)(const pcre_uchar *str)
|
||||
{
|
||||
unsigned int len = 0;
|
||||
while (*str++ != 0)
|
||||
len++;
|
||||
return len;
|
||||
}
|
||||
|
||||
#endif /* COMPILE_PCRE8 */
|
||||
|
||||
/* End of pcre_string_utils.c */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,602 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE_INCLUDED
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. The tables are also #included by the pcretest program, which
|
||||
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||
clashes with the library. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#endif /* PCRE_INCLUDED */
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre_internal.h. */
|
||||
|
||||
const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
|
||||
|| (defined PCRE_INCLUDED && defined SUPPORT_PCRE16)
|
||||
|
||||
/* These tables are also required by pcretest in 16 bit mode. */
|
||||
|
||||
const int PRIV(utf8_table1)[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||
|
||||
const pcre_uint8 PRIV(utf8_table4)[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
const int PRIV(ucp_gentype)[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
/* This table reverses PRIV(ucp_gentype). We can save the cost
|
||||
of a memory load. */
|
||||
|
||||
const int PRIV(ucp_typerange)[] = {
|
||||
ucp_Cc, ucp_Cs,
|
||||
ucp_Ll, ucp_Lu,
|
||||
ucp_Mc, ucp_Mn,
|
||||
ucp_Nd, ucp_No,
|
||||
ucp_Pc, ucp_Ps,
|
||||
ucp_Sc, ucp_So,
|
||||
ucp_Zl, ucp_Zs,
|
||||
};
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* The pcre_utt[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
field of each entry. However, that leads to a large number of relocations when
|
||||
a shared library is dynamically loaded. A significant reduction is made by
|
||||
putting all the names into a single, large string and then using offsets in the
|
||||
table itself. Maintenance is more error-prone, but frequent changes to this
|
||||
data are unlikely.
|
||||
|
||||
July 2008: There is now a script called maint/GenerateUtt.py that can be used
|
||||
to generate this data automatically instead of maintaining it by hand.
|
||||
|
||||
The script was updated in March 2009 to generate a new EBCDIC-compliant
|
||||
version. Like all other character and string literals that are compared against
|
||||
the regular expression pattern, we must use STR_ macros instead of literal
|
||||
strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
|
||||
#define STRING_Any0 STR_A STR_n STR_y "\0"
|
||||
#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
|
||||
#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
|
||||
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
|
||||
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
|
||||
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
|
||||
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
||||
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
||||
#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
|
||||
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
|
||||
#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"
|
||||
#define STRING_C0 STR_C "\0"
|
||||
#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
|
||||
#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
#define STRING_Cc0 STR_C STR_c "\0"
|
||||
#define STRING_Cf0 STR_C STR_f "\0"
|
||||
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
|
||||
#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
|
||||
#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
|
||||
#define STRING_Cn0 STR_C STR_n "\0"
|
||||
#define STRING_Co0 STR_C STR_o "\0"
|
||||
#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
|
||||
#define STRING_Coptic0 STR_C STR_o STR_p STR_t STR_i STR_c "\0"
|
||||
#define STRING_Cs0 STR_C STR_s "\0"
|
||||
#define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0"
|
||||
#define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0"
|
||||
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
||||
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
||||
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
|
||||
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
|
||||
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
|
||||
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
|
||||
#define STRING_Han0 STR_H STR_a STR_n "\0"
|
||||
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
|
||||
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
|
||||
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
|
||||
#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
|
||||
#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
|
||||
#define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
|
||||
#define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||
#define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
|
||||
#define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0"
|
||||
#define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0"
|
||||
#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
|
||||
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
||||
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
||||
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
|
||||
#define STRING_L0 STR_L "\0"
|
||||
#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
|
||||
#define STRING_Lao0 STR_L STR_a STR_o "\0"
|
||||
#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
|
||||
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
|
||||
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
|
||||
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
|
||||
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
|
||||
#define STRING_Ll0 STR_L STR_l "\0"
|
||||
#define STRING_Lm0 STR_L STR_m "\0"
|
||||
#define STRING_Lo0 STR_L STR_o "\0"
|
||||
#define STRING_Lt0 STR_L STR_t "\0"
|
||||
#define STRING_Lu0 STR_L STR_u "\0"
|
||||
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
|
||||
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_M0 STR_M "\0"
|
||||
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
||||
#define STRING_Mc0 STR_M STR_c "\0"
|
||||
#define STRING_Me0 STR_M STR_e "\0"
|
||||
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
||||
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
|
||||
#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
|
||||
#define STRING_Mn0 STR_M STR_n "\0"
|
||||
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||
#define STRING_N0 STR_N "\0"
|
||||
#define STRING_Nd0 STR_N STR_d "\0"
|
||||
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
|
||||
#define STRING_Nko0 STR_N STR_k STR_o "\0"
|
||||
#define STRING_Nl0 STR_N STR_l "\0"
|
||||
#define STRING_No0 STR_N STR_o "\0"
|
||||
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
||||
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
||||
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
||||
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
||||
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
||||
#define STRING_P0 STR_P "\0"
|
||||
#define STRING_Pc0 STR_P STR_c "\0"
|
||||
#define STRING_Pd0 STR_P STR_d "\0"
|
||||
#define STRING_Pe0 STR_P STR_e "\0"
|
||||
#define STRING_Pf0 STR_P STR_f "\0"
|
||||
#define STRING_Phags_Pa0 STR_P STR_h STR_a STR_g STR_s STR_UNDERSCORE STR_P STR_a "\0"
|
||||
#define STRING_Phoenician0 STR_P STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0"
|
||||
#define STRING_Pi0 STR_P STR_i "\0"
|
||||
#define STRING_Po0 STR_P STR_o "\0"
|
||||
#define STRING_Ps0 STR_P STR_s "\0"
|
||||
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
|
||||
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
|
||||
#define STRING_S0 STR_S "\0"
|
||||
#define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
|
||||
#define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
|
||||
#define STRING_Sc0 STR_S STR_c "\0"
|
||||
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
||||
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
||||
#define STRING_Sk0 STR_S STR_k "\0"
|
||||
#define STRING_Sm0 STR_S STR_m "\0"
|
||||
#define STRING_So0 STR_S STR_o "\0"
|
||||
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
|
||||
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
|
||||
#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
|
||||
#define STRING_Tagalog0 STR_T STR_a STR_g STR_a STR_l STR_o STR_g "\0"
|
||||
#define STRING_Tagbanwa0 STR_T STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0"
|
||||
#define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0"
|
||||
#define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0"
|
||||
#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
|
||||
#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0"
|
||||
#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
|
||||
#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
|
||||
#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
|
||||
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
|
||||
#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
|
||||
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
|
||||
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Vai0 STR_V STR_a STR_i "\0"
|
||||
#define STRING_Xan0 STR_X STR_a STR_n "\0"
|
||||
#define STRING_Xps0 STR_X STR_p STR_s "\0"
|
||||
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
|
||||
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
|
||||
#define STRING_Yi0 STR_Y STR_i "\0"
|
||||
#define STRING_Z0 STR_Z "\0"
|
||||
#define STRING_Zl0 STR_Z STR_l "\0"
|
||||
#define STRING_Zp0 STR_Z STR_p "\0"
|
||||
#define STRING_Zs0 STR_Z STR_s "\0"
|
||||
|
||||
const char PRIV(utt_names)[] =
|
||||
STRING_Any0
|
||||
STRING_Arabic0
|
||||
STRING_Armenian0
|
||||
STRING_Avestan0
|
||||
STRING_Balinese0
|
||||
STRING_Bamum0
|
||||
STRING_Batak0
|
||||
STRING_Bengali0
|
||||
STRING_Bopomofo0
|
||||
STRING_Brahmi0
|
||||
STRING_Braille0
|
||||
STRING_Buginese0
|
||||
STRING_Buhid0
|
||||
STRING_C0
|
||||
STRING_Canadian_Aboriginal0
|
||||
STRING_Carian0
|
||||
STRING_Cc0
|
||||
STRING_Cf0
|
||||
STRING_Chakma0
|
||||
STRING_Cham0
|
||||
STRING_Cherokee0
|
||||
STRING_Cn0
|
||||
STRING_Co0
|
||||
STRING_Common0
|
||||
STRING_Coptic0
|
||||
STRING_Cs0
|
||||
STRING_Cuneiform0
|
||||
STRING_Cypriot0
|
||||
STRING_Cyrillic0
|
||||
STRING_Deseret0
|
||||
STRING_Devanagari0
|
||||
STRING_Egyptian_Hieroglyphs0
|
||||
STRING_Ethiopic0
|
||||
STRING_Georgian0
|
||||
STRING_Glagolitic0
|
||||
STRING_Gothic0
|
||||
STRING_Greek0
|
||||
STRING_Gujarati0
|
||||
STRING_Gurmukhi0
|
||||
STRING_Han0
|
||||
STRING_Hangul0
|
||||
STRING_Hanunoo0
|
||||
STRING_Hebrew0
|
||||
STRING_Hiragana0
|
||||
STRING_Imperial_Aramaic0
|
||||
STRING_Inherited0
|
||||
STRING_Inscriptional_Pahlavi0
|
||||
STRING_Inscriptional_Parthian0
|
||||
STRING_Javanese0
|
||||
STRING_Kaithi0
|
||||
STRING_Kannada0
|
||||
STRING_Katakana0
|
||||
STRING_Kayah_Li0
|
||||
STRING_Kharoshthi0
|
||||
STRING_Khmer0
|
||||
STRING_L0
|
||||
STRING_L_AMPERSAND0
|
||||
STRING_Lao0
|
||||
STRING_Latin0
|
||||
STRING_Lepcha0
|
||||
STRING_Limbu0
|
||||
STRING_Linear_B0
|
||||
STRING_Lisu0
|
||||
STRING_Ll0
|
||||
STRING_Lm0
|
||||
STRING_Lo0
|
||||
STRING_Lt0
|
||||
STRING_Lu0
|
||||
STRING_Lycian0
|
||||
STRING_Lydian0
|
||||
STRING_M0
|
||||
STRING_Malayalam0
|
||||
STRING_Mandaic0
|
||||
STRING_Mc0
|
||||
STRING_Me0
|
||||
STRING_Meetei_Mayek0
|
||||
STRING_Meroitic_Cursive0
|
||||
STRING_Meroitic_Hieroglyphs0
|
||||
STRING_Miao0
|
||||
STRING_Mn0
|
||||
STRING_Mongolian0
|
||||
STRING_Myanmar0
|
||||
STRING_N0
|
||||
STRING_Nd0
|
||||
STRING_New_Tai_Lue0
|
||||
STRING_Nko0
|
||||
STRING_Nl0
|
||||
STRING_No0
|
||||
STRING_Ogham0
|
||||
STRING_Ol_Chiki0
|
||||
STRING_Old_Italic0
|
||||
STRING_Old_Persian0
|
||||
STRING_Old_South_Arabian0
|
||||
STRING_Old_Turkic0
|
||||
STRING_Oriya0
|
||||
STRING_Osmanya0
|
||||
STRING_P0
|
||||
STRING_Pc0
|
||||
STRING_Pd0
|
||||
STRING_Pe0
|
||||
STRING_Pf0
|
||||
STRING_Phags_Pa0
|
||||
STRING_Phoenician0
|
||||
STRING_Pi0
|
||||
STRING_Po0
|
||||
STRING_Ps0
|
||||
STRING_Rejang0
|
||||
STRING_Runic0
|
||||
STRING_S0
|
||||
STRING_Samaritan0
|
||||
STRING_Saurashtra0
|
||||
STRING_Sc0
|
||||
STRING_Sharada0
|
||||
STRING_Shavian0
|
||||
STRING_Sinhala0
|
||||
STRING_Sk0
|
||||
STRING_Sm0
|
||||
STRING_So0
|
||||
STRING_Sora_Sompeng0
|
||||
STRING_Sundanese0
|
||||
STRING_Syloti_Nagri0
|
||||
STRING_Syriac0
|
||||
STRING_Tagalog0
|
||||
STRING_Tagbanwa0
|
||||
STRING_Tai_Le0
|
||||
STRING_Tai_Tham0
|
||||
STRING_Tai_Viet0
|
||||
STRING_Takri0
|
||||
STRING_Tamil0
|
||||
STRING_Telugu0
|
||||
STRING_Thaana0
|
||||
STRING_Thai0
|
||||
STRING_Tibetan0
|
||||
STRING_Tifinagh0
|
||||
STRING_Ugaritic0
|
||||
STRING_Vai0
|
||||
STRING_Xan0
|
||||
STRING_Xps0
|
||||
STRING_Xsp0
|
||||
STRING_Xwd0
|
||||
STRING_Yi0
|
||||
STRING_Z0
|
||||
STRING_Zl0
|
||||
STRING_Zp0
|
||||
STRING_Zs0;
|
||||
|
||||
const ucp_type_table PRIV(utt)[] = {
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Avestan },
|
||||
{ 28, PT_SC, ucp_Balinese },
|
||||
{ 37, PT_SC, ucp_Bamum },
|
||||
{ 43, PT_SC, ucp_Batak },
|
||||
{ 49, PT_SC, ucp_Bengali },
|
||||
{ 57, PT_SC, ucp_Bopomofo },
|
||||
{ 66, PT_SC, ucp_Brahmi },
|
||||
{ 73, PT_SC, ucp_Braille },
|
||||
{ 81, PT_SC, ucp_Buginese },
|
||||
{ 90, PT_SC, ucp_Buhid },
|
||||
{ 96, PT_GC, ucp_C },
|
||||
{ 98, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 118, PT_SC, ucp_Carian },
|
||||
{ 125, PT_PC, ucp_Cc },
|
||||
{ 128, PT_PC, ucp_Cf },
|
||||
{ 131, PT_SC, ucp_Chakma },
|
||||
{ 138, PT_SC, ucp_Cham },
|
||||
{ 143, PT_SC, ucp_Cherokee },
|
||||
{ 152, PT_PC, ucp_Cn },
|
||||
{ 155, PT_PC, ucp_Co },
|
||||
{ 158, PT_SC, ucp_Common },
|
||||
{ 165, PT_SC, ucp_Coptic },
|
||||
{ 172, PT_PC, ucp_Cs },
|
||||
{ 175, PT_SC, ucp_Cuneiform },
|
||||
{ 185, PT_SC, ucp_Cypriot },
|
||||
{ 193, PT_SC, ucp_Cyrillic },
|
||||
{ 202, PT_SC, ucp_Deseret },
|
||||
{ 210, PT_SC, ucp_Devanagari },
|
||||
{ 221, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 242, PT_SC, ucp_Ethiopic },
|
||||
{ 251, PT_SC, ucp_Georgian },
|
||||
{ 260, PT_SC, ucp_Glagolitic },
|
||||
{ 271, PT_SC, ucp_Gothic },
|
||||
{ 278, PT_SC, ucp_Greek },
|
||||
{ 284, PT_SC, ucp_Gujarati },
|
||||
{ 293, PT_SC, ucp_Gurmukhi },
|
||||
{ 302, PT_SC, ucp_Han },
|
||||
{ 306, PT_SC, ucp_Hangul },
|
||||
{ 313, PT_SC, ucp_Hanunoo },
|
||||
{ 321, PT_SC, ucp_Hebrew },
|
||||
{ 328, PT_SC, ucp_Hiragana },
|
||||
{ 337, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 354, PT_SC, ucp_Inherited },
|
||||
{ 364, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 386, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 409, PT_SC, ucp_Javanese },
|
||||
{ 418, PT_SC, ucp_Kaithi },
|
||||
{ 425, PT_SC, ucp_Kannada },
|
||||
{ 433, PT_SC, ucp_Katakana },
|
||||
{ 442, PT_SC, ucp_Kayah_Li },
|
||||
{ 451, PT_SC, ucp_Kharoshthi },
|
||||
{ 462, PT_SC, ucp_Khmer },
|
||||
{ 468, PT_GC, ucp_L },
|
||||
{ 470, PT_LAMP, 0 },
|
||||
{ 473, PT_SC, ucp_Lao },
|
||||
{ 477, PT_SC, ucp_Latin },
|
||||
{ 483, PT_SC, ucp_Lepcha },
|
||||
{ 490, PT_SC, ucp_Limbu },
|
||||
{ 496, PT_SC, ucp_Linear_B },
|
||||
{ 505, PT_SC, ucp_Lisu },
|
||||
{ 510, PT_PC, ucp_Ll },
|
||||
{ 513, PT_PC, ucp_Lm },
|
||||
{ 516, PT_PC, ucp_Lo },
|
||||
{ 519, PT_PC, ucp_Lt },
|
||||
{ 522, PT_PC, ucp_Lu },
|
||||
{ 525, PT_SC, ucp_Lycian },
|
||||
{ 532, PT_SC, ucp_Lydian },
|
||||
{ 539, PT_GC, ucp_M },
|
||||
{ 541, PT_SC, ucp_Malayalam },
|
||||
{ 551, PT_SC, ucp_Mandaic },
|
||||
{ 559, PT_PC, ucp_Mc },
|
||||
{ 562, PT_PC, ucp_Me },
|
||||
{ 565, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 578, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 595, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 616, PT_SC, ucp_Miao },
|
||||
{ 621, PT_PC, ucp_Mn },
|
||||
{ 624, PT_SC, ucp_Mongolian },
|
||||
{ 634, PT_SC, ucp_Myanmar },
|
||||
{ 642, PT_GC, ucp_N },
|
||||
{ 644, PT_PC, ucp_Nd },
|
||||
{ 647, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 659, PT_SC, ucp_Nko },
|
||||
{ 663, PT_PC, ucp_Nl },
|
||||
{ 666, PT_PC, ucp_No },
|
||||
{ 669, PT_SC, ucp_Ogham },
|
||||
{ 675, PT_SC, ucp_Ol_Chiki },
|
||||
{ 684, PT_SC, ucp_Old_Italic },
|
||||
{ 695, PT_SC, ucp_Old_Persian },
|
||||
{ 707, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 725, PT_SC, ucp_Old_Turkic },
|
||||
{ 736, PT_SC, ucp_Oriya },
|
||||
{ 742, PT_SC, ucp_Osmanya },
|
||||
{ 750, PT_GC, ucp_P },
|
||||
{ 752, PT_PC, ucp_Pc },
|
||||
{ 755, PT_PC, ucp_Pd },
|
||||
{ 758, PT_PC, ucp_Pe },
|
||||
{ 761, PT_PC, ucp_Pf },
|
||||
{ 764, PT_SC, ucp_Phags_Pa },
|
||||
{ 773, PT_SC, ucp_Phoenician },
|
||||
{ 784, PT_PC, ucp_Pi },
|
||||
{ 787, PT_PC, ucp_Po },
|
||||
{ 790, PT_PC, ucp_Ps },
|
||||
{ 793, PT_SC, ucp_Rejang },
|
||||
{ 800, PT_SC, ucp_Runic },
|
||||
{ 806, PT_GC, ucp_S },
|
||||
{ 808, PT_SC, ucp_Samaritan },
|
||||
{ 818, PT_SC, ucp_Saurashtra },
|
||||
{ 829, PT_PC, ucp_Sc },
|
||||
{ 832, PT_SC, ucp_Sharada },
|
||||
{ 840, PT_SC, ucp_Shavian },
|
||||
{ 848, PT_SC, ucp_Sinhala },
|
||||
{ 856, PT_PC, ucp_Sk },
|
||||
{ 859, PT_PC, ucp_Sm },
|
||||
{ 862, PT_PC, ucp_So },
|
||||
{ 865, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 878, PT_SC, ucp_Sundanese },
|
||||
{ 888, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 901, PT_SC, ucp_Syriac },
|
||||
{ 908, PT_SC, ucp_Tagalog },
|
||||
{ 916, PT_SC, ucp_Tagbanwa },
|
||||
{ 925, PT_SC, ucp_Tai_Le },
|
||||
{ 932, PT_SC, ucp_Tai_Tham },
|
||||
{ 941, PT_SC, ucp_Tai_Viet },
|
||||
{ 950, PT_SC, ucp_Takri },
|
||||
{ 956, PT_SC, ucp_Tamil },
|
||||
{ 962, PT_SC, ucp_Telugu },
|
||||
{ 969, PT_SC, ucp_Thaana },
|
||||
{ 976, PT_SC, ucp_Thai },
|
||||
{ 981, PT_SC, ucp_Tibetan },
|
||||
{ 989, PT_SC, ucp_Tifinagh },
|
||||
{ 998, PT_SC, ucp_Ugaritic },
|
||||
{ 1007, PT_SC, ucp_Vai },
|
||||
{ 1011, PT_ALNUM, 0 },
|
||||
{ 1015, PT_PXSPACE, 0 },
|
||||
{ 1019, PT_SPACE, 0 },
|
||||
{ 1023, PT_WORD, 0 },
|
||||
{ 1027, PT_SC, ucp_Yi },
|
||||
{ 1030, PT_GC, ucp_Z },
|
||||
{ 1032, PT_PC, ucp_Zl },
|
||||
{ 1035, PT_PC, ucp_Zp },
|
||||
{ 1038, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
||||
unsigned int
|
||||
_pcre_ucp_othercase(const unsigned int c)
|
||||
{
|
||||
unsigned int oc = NOTACHAR;
|
||||
|
||||
if ((oc = g_unichar_toupper(c)) != c)
|
||||
return oc;
|
||||
if ((oc = g_unichar_tolower(c)) != c)
|
||||
return oc;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
/* End of pcre_tables.c */
|
||||
@@ -1,299 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function for validating UTF-8 character
|
||||
strings. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate a UTF-8 string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called (optionally) at the start of compile or match, to
|
||||
check that a supposed UTF-8 string is actually valid. The early check means
|
||||
that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying an
|
||||
invalid string are then undefined.
|
||||
|
||||
Originally, this function checked according to RFC 2279, allowing for values in
|
||||
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
|
||||
the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applied. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
|
||||
characters is still checked.
|
||||
|
||||
From release 8.13 more information about the details of the error are passed
|
||||
back in the returned value:
|
||||
|
||||
PCRE_UTF8_ERR0 No error
|
||||
PCRE_UTF8_ERR1 Missing 1 byte at the end of the string
|
||||
PCRE_UTF8_ERR2 Missing 2 bytes at the end of the string
|
||||
PCRE_UTF8_ERR3 Missing 3 bytes at the end of the string
|
||||
PCRE_UTF8_ERR4 Missing 4 bytes at the end of the string
|
||||
PCRE_UTF8_ERR5 Missing 5 bytes at the end of the string
|
||||
PCRE_UTF8_ERR6 2nd-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR7 3rd-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR8 4th-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR9 5th-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||
PCRE_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||
PCRE_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||
PCRE_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted
|
||||
PCRE_UTF8_ERR15 Overlong 2-byte sequence
|
||||
PCRE_UTF8_ERR16 Overlong 3-byte sequence
|
||||
PCRE_UTF8_ERR17 Overlong 4-byte sequence
|
||||
PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
|
||||
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
|
||||
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
|
||||
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string, or -1 if the string is zero-terminated
|
||||
errp pointer to an error position offset variable
|
||||
|
||||
Returns: = 0 if the string is a valid UTF-8 string
|
||||
> 0 otherwise, setting the offset of the bad character
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
PCRE_PUCHAR p;
|
||||
|
||||
if (length < 0)
|
||||
{
|
||||
for (p = string; *p != 0; p++);
|
||||
length = (int)(p - string);
|
||||
}
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
{
|
||||
int ab, c, d;
|
||||
|
||||
c = *p;
|
||||
if (c < 128) continue; /* ASCII character */
|
||||
|
||||
if (c < 0xc0) /* Isolated 10xx xxxx byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
return PCRE_UTF8_ERR20;
|
||||
}
|
||||
|
||||
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
return PCRE_UTF8_ERR21;
|
||||
}
|
||||
|
||||
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
|
||||
if (length < ab)
|
||||
{
|
||||
*erroroffset = (int)(p - string); /* Missing bytes */
|
||||
return ab - length; /* Codes ERR1 to ERR5 */
|
||||
}
|
||||
length -= ab; /* Length remaining */
|
||||
|
||||
/* Check top bits in the second byte */
|
||||
|
||||
if (((d = *(++p)) & 0xc0) != 0x80)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 1;
|
||||
return PCRE_UTF8_ERR6;
|
||||
}
|
||||
|
||||
/* For each length, check that the remaining bytes start with the 0x80 bit
|
||||
set and not the 0x40 bit. Then check for an overlong sequence, and for the
|
||||
excluded range 0xd800 to 0xdfff. */
|
||||
|
||||
switch (ab)
|
||||
{
|
||||
/* 2-byte character. No further bytes to check for 0x80. Check first byte
|
||||
for for xx00 000x (overlong sequence). */
|
||||
|
||||
case 1: if ((c & 0x3e) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 1;
|
||||
return PCRE_UTF8_ERR15;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
|
||||
for 1110 0000, xx0x xxxx (overlong sequence) or
|
||||
1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
|
||||
|
||||
case 2:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
}
|
||||
if (c == 0xe0 && (d & 0x20) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR16;
|
||||
}
|
||||
if (c == 0xed && d >= 0xa0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR14;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
|
||||
bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
|
||||
character greater than 0x0010ffff (f4 8f bf bf) */
|
||||
|
||||
case 3:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR8;
|
||||
}
|
||||
if (c == 0xf0 && (d & 0x30) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR17;
|
||||
}
|
||||
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR13;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
|
||||
rejected by the length test below. However, we do the appropriate tests
|
||||
here so that overlong sequences get diagnosed, and also in case there is
|
||||
ever an option for handling these larger code points. */
|
||||
|
||||
/* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
|
||||
1111 1000, xx00 0xxx */
|
||||
|
||||
case 4:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE_UTF8_ERR9;
|
||||
}
|
||||
if (c == 0xf8 && (d & 0x38) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE_UTF8_ERR18;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
|
||||
1111 1100, xx00 00xx. */
|
||||
|
||||
case 5:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE_UTF8_ERR9;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 5;
|
||||
return PCRE_UTF8_ERR10;
|
||||
}
|
||||
if (c == 0xfc && (d & 0x3c) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 5;
|
||||
return PCRE_UTF8_ERR19;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
|
||||
excluded by RFC 3629. The pointer p is currently at the last byte of the
|
||||
character. */
|
||||
|
||||
if (ab > 3)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - ab;
|
||||
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* SUPPORT_UTF */
|
||||
(void)(string); /* Keep picky compilers happy */
|
||||
(void)(length);
|
||||
#endif
|
||||
|
||||
return PCRE_UTF8_ERR0; /* This indicates success */
|
||||
}
|
||||
|
||||
/* End of pcre_valid_utf8.c */
|
||||
@@ -1,198 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class. It is used by both pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain values > 255 and/or Unicode properties.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag byte of the XCLASS data
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(xclass)(int c, const pcre_uchar *data, BOOL utf)
|
||||
{
|
||||
int t;
|
||||
BOOL negated = (*data & XCL_NOT) != 0;
|
||||
|
||||
(void)utf;
|
||||
#ifdef COMPILE_PCRE8
|
||||
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
|
||||
utf = TRUE;
|
||||
#endif
|
||||
|
||||
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||
not, we still carry on, because there may be ranges that start below 256 in the
|
||||
additional data. */
|
||||
|
||||
if (c < 256)
|
||||
{
|
||||
if ((*data & XCL_MAP) != 0 &&
|
||||
(((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
|
||||
return !negated; /* char found */
|
||||
}
|
||||
|
||||
/* First skip the bit map if present. Then match against the list of Unicode
|
||||
properties or large chars or ranges that end with a large char. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
|
||||
|
||||
while ((t = *data++) != XCL_END)
|
||||
{
|
||||
int x, y;
|
||||
if (t == XCL_SINGLE)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
x = *data++;
|
||||
if (c == x) return !negated;
|
||||
}
|
||||
else if (t == XCL_RANGE)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
GETCHARINC(y, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
x = *data++;
|
||||
y = *data++;
|
||||
}
|
||||
if (c >= x && c <= y) return !negated;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
case PT_ANY:
|
||||
if (t == XCL_PROP) return !negated;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
if ((chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
||||
== (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
|
||||
== (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
data += 2;
|
||||
}
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
|
||||
return negated; /* char did not match */
|
||||
}
|
||||
|
||||
/* End of pcre_xclass.c */
|
||||
179
glib/pcre/ucp.h
179
glib/pcre/ucp.h
@@ -1,179 +0,0 @@
|
||||
/*************************************************
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
#ifndef _UCP_H
|
||||
#define _UCP_H
|
||||
|
||||
/* This file contains definitions of the property values that are returned by
|
||||
the UCD access macros. New values that are added for new releases of Unicode
|
||||
should always be at the end of each enum, for backwards compatibility. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
#ifdef GLIB_COMPILATION
|
||||
#include "gunicode.h"
|
||||
#else
|
||||
#include <glib.h>
|
||||
#endif
|
||||
|
||||
enum {
|
||||
ucp_C, /* Other */
|
||||
ucp_L, /* Letter */
|
||||
ucp_M, /* Mark */
|
||||
ucp_N, /* Number */
|
||||
ucp_P, /* Punctuation */
|
||||
ucp_S, /* Symbol */
|
||||
ucp_Z /* Separator */
|
||||
};
|
||||
|
||||
/* These are the particular character types. */
|
||||
|
||||
enum {
|
||||
ucp_Cc = G_UNICODE_CONTROL, /* Control */
|
||||
ucp_Cf = G_UNICODE_FORMAT, /* Format */
|
||||
ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */
|
||||
ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */
|
||||
ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */
|
||||
ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */
|
||||
ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */
|
||||
ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */
|
||||
ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */
|
||||
ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */
|
||||
ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */
|
||||
ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */
|
||||
ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */
|
||||
ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */
|
||||
ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */
|
||||
ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */
|
||||
ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */
|
||||
ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */
|
||||
ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */
|
||||
ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */
|
||||
ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */
|
||||
ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */
|
||||
ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */
|
||||
ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */
|
||||
ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */
|
||||
ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */
|
||||
ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */
|
||||
ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */
|
||||
ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */
|
||||
ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
ucp_Common = G_UNICODE_SCRIPT_COMMON,
|
||||
ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
|
||||
|
||||
ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
|
||||
ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
|
||||
ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
|
||||
ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
|
||||
ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
|
||||
ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
|
||||
ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
|
||||
ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
|
||||
ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
|
||||
ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
|
||||
ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
|
||||
ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
|
||||
ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
|
||||
ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
|
||||
ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
|
||||
ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
|
||||
ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
|
||||
ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
|
||||
ucp_Greek = G_UNICODE_SCRIPT_GREEK,
|
||||
ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
|
||||
ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
|
||||
ucp_Han = G_UNICODE_SCRIPT_HAN,
|
||||
ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
|
||||
ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
|
||||
ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
|
||||
ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
|
||||
ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
|
||||
ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
|
||||
ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
|
||||
ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
|
||||
ucp_Lao = G_UNICODE_SCRIPT_LAO,
|
||||
ucp_Latin = G_UNICODE_SCRIPT_LATIN,
|
||||
ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
|
||||
ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
|
||||
ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
|
||||
ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
|
||||
ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
|
||||
ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
|
||||
ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
|
||||
ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
|
||||
ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
|
||||
ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
|
||||
ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
|
||||
ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
|
||||
ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
|
||||
ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
|
||||
ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
|
||||
ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
|
||||
ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
|
||||
ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
|
||||
ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
|
||||
ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
|
||||
ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
|
||||
ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
|
||||
ucp_Thai = G_UNICODE_SCRIPT_THAI,
|
||||
ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
|
||||
ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
|
||||
ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
|
||||
ucp_Yi = G_UNICODE_SCRIPT_YI,
|
||||
/* New for Unicode 5.0: */
|
||||
ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
|
||||
ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
|
||||
ucp_Nko = G_UNICODE_SCRIPT_NKO,
|
||||
ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
|
||||
ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
|
||||
/* New for Unicode 5.1: */
|
||||
ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
|
||||
ucp_Cham = G_UNICODE_SCRIPT_CHAM,
|
||||
ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
|
||||
ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
|
||||
ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
|
||||
ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
|
||||
ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
|
||||
ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
|
||||
ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
|
||||
ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
|
||||
ucp_Vai = G_UNICODE_SCRIPT_VAI,
|
||||
/* New for Unicode 5.2: */
|
||||
ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
|
||||
ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
|
||||
ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
|
||||
ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
|
||||
ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
|
||||
ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
|
||||
ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
|
||||
ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
|
||||
ucp_Lisu = G_UNICODE_SCRIPT_LISU,
|
||||
ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
|
||||
ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
|
||||
ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
|
||||
ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
|
||||
ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
|
||||
ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
|
||||
/* New for Unicode 6.0.0: */
|
||||
ucp_Batak = G_UNICODE_SCRIPT_BATAK,
|
||||
ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
|
||||
ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
|
||||
/* New for Unicode 6.1.0: */
|
||||
ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
|
||||
ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
|
||||
ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
|
||||
ucp_Miao = G_UNICODE_SCRIPT_MIAO,
|
||||
ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
|
||||
ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
|
||||
ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* End of ucp.h */
|
||||
@@ -2446,7 +2446,7 @@ main (int argc, char *argv[])
|
||||
/* Test that othercasing in our pcre/glib integration is bug-for-bug compatible
|
||||
* with pcre's internal tables. Bug #678273 */
|
||||
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "DŽ", -1, 0, 0, TRUE);
|
||||
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "Dž", -1, 0, 0, FALSE);
|
||||
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "Dž", -1, 0, 0, TRUE);
|
||||
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "dž", -1, 0, 0, TRUE);
|
||||
|
||||
/* TEST_MATCH_NEXT#(pattern, string, string_len, start_position, ...) */
|
||||
@@ -2628,11 +2628,6 @@ main (int argc, char *argv[])
|
||||
TEST_EXPAND("a", "a", "\\0130", FALSE, "X");
|
||||
TEST_EXPAND("a", "a", "\\\\\\0", FALSE, "\\a");
|
||||
TEST_EXPAND("a(?P<G>.)c", "xabcy", "X\\g<G>X", FALSE, "XbX");
|
||||
#ifndef USE_SYSTEM_PCRE
|
||||
/* PCRE >= 8.34 no longer allows this usage. */
|
||||
TEST_EXPAND("(.)(?P<1>.)", "ab", "\\1", FALSE, "a");
|
||||
TEST_EXPAND("(.)(?P<1>.)", "ab", "\\g<1>", FALSE, "a");
|
||||
#endif
|
||||
TEST_EXPAND(".", EURO, "\\0", FALSE, EURO);
|
||||
TEST_EXPAND("(.)", EURO, "\\1", FALSE, EURO);
|
||||
TEST_EXPAND("(?P<G>.)", EURO, "\\g<G>", FALSE, EURO);
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
include $(top_srcdir)/glib.mk
|
||||
|
||||
EXTRA_DIST += \
|
||||
update.sh \
|
||||
digitab.patch \
|
||||
memory.patch
|
||||
@@ -1,94 +0,0 @@
|
||||
From 5238ab10c5f3082a4be38410bd01a47ab176dfde Mon Sep 17 00:00:00 2001
|
||||
From: Christian Persch <chpe@gnome.org>
|
||||
Date: Sun, 12 Feb 2012 19:29:42 +0100
|
||||
Subject: [PATCH] regex: Use g_ascii_is[x]digit
|
||||
|
||||
---
|
||||
glib/pcre/pcre_compile.c | 22 ++++++++++++----------
|
||||
1 files changed, 12 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
|
||||
index 8070f51..eb985df 100644
|
||||
--- a/glib/pcre/pcre_compile.c
|
||||
+++ b/glib/pcre/pcre_compile.c
|
||||
@@ -52,6 +52,7 @@ supporting internal functions that are not used by other modules. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
+#include "gstrfuncs.h"
|
||||
|
||||
/* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
|
||||
is also used by pcretest. PCRE_DEBUG is not defined when building a production
|
||||
@@ -513,6 +514,7 @@ into a subtraction and unsigned comparison). */
|
||||
|
||||
#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
|
||||
|
||||
+#if 0
|
||||
#ifndef EBCDIC
|
||||
|
||||
/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
|
||||
@@ -626,7 +628,7 @@ static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
|
||||
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
|
||||
#endif
|
||||
-
|
||||
+#endif /* 0 */
|
||||
|
||||
/* Definition to allow mutual recursion */
|
||||
|
||||
@@ -812,10 +814,10 @@ else
|
||||
{
|
||||
/* In JavaScript, \u must be followed by four hexadecimal numbers.
|
||||
Otherwise it is a lowercase u letter. */
|
||||
- if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
|
||||
- && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
|
||||
- && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
|
||||
- && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
|
||||
+ if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
|
||||
+ && MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0
|
||||
+ && MAX_255(ptr[3]) && g_ascii_isxdigit(ptr[3]) != 0
|
||||
+ && MAX_255(ptr[4]) && g_ascii_isxdigit(ptr[4]) != 0)
|
||||
{
|
||||
c = 0;
|
||||
for (i = 0; i < 4; ++i)
|
||||
@@ -1012,8 +1014,8 @@ else
|
||||
{
|
||||
/* In JavaScript, \x must be followed by two hexadecimal numbers.
|
||||
Otherwise it is a lowercase x letter. */
|
||||
- if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
|
||||
- && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
|
||||
+ if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
|
||||
+ && MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0)
|
||||
{
|
||||
c = 0;
|
||||
for (i = 0; i < 2; ++i)
|
||||
@@ -1036,7 +1038,7 @@ else
|
||||
const pcre_uchar *pt = ptr + 2;
|
||||
|
||||
c = 0;
|
||||
- while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
|
||||
+ while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0)
|
||||
{
|
||||
register int cc = *pt++;
|
||||
if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
|
||||
@@ -1060,7 +1062,7 @@ else
|
||||
|
||||
if (c < 0)
|
||||
{
|
||||
- while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;
|
||||
+ while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0) pt++;
|
||||
*errorcodeptr = ERR34;
|
||||
}
|
||||
|
||||
@@ -1078,7 +1080,7 @@ else
|
||||
/* Read just a single-byte hex-defined char */
|
||||
|
||||
c = 0;
|
||||
- while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
|
||||
+ while (i++ < 2 && MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0)
|
||||
{
|
||||
int cc; /* Some compilers don't like */
|
||||
cc = *(++ptr); /* ++ in initializers */
|
||||
--
|
||||
1.7.5.1.217.g4e3aa.dirty
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
From acf401f1353a37b6edff9577ff07d055c625e4ca Mon Sep 17 00:00:00 2001
|
||||
From: Christian Persch <chpe@gnome.org>
|
||||
Date: Sun, 12 Feb 2012 19:40:48 +0100
|
||||
Subject: [PATCH] regex: Use glib memory allocator
|
||||
|
||||
---
|
||||
glib/pcre/pcre_globals.c | 10 ++++++----
|
||||
1 files changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/glib/pcre/pcre_globals.c b/glib/pcre/pcre_globals.c
|
||||
index 36e6ddb..93d3af5 100644
|
||||
--- a/glib/pcre/pcre_globals.c
|
||||
+++ b/glib/pcre/pcre_globals.c
|
||||
@@ -58,6 +58,8 @@ global variables are not used. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
+#include "gmem.h"
|
||||
+
|
||||
#if defined _MSC_VER || defined __SYMBIAN32__
|
||||
static void* LocalPcreMalloc(size_t aSize)
|
||||
{
|
||||
@@ -74,10 +76,10 @@ PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
|
||||
#elif !defined VPCOMPAT
|
||||
-PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
|
||||
-PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = free;
|
||||
-PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
|
||||
-PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free;
|
||||
+PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = g_try_malloc;
|
||||
+PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = g_free;
|
||||
+PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = g_try_malloc;
|
||||
+PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = g_free;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
#endif
|
||||
|
||||
--
|
||||
1.7.5.1.217.g4e3aa.dirty
|
||||
|
||||
@@ -1,834 +0,0 @@
|
||||
From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Persch <chpe@gnome.org>
|
||||
Date: Sun, 12 Feb 2012 21:20:33 +0100
|
||||
Subject: [PATCH] regex: Use glib for unicode data
|
||||
|
||||
Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
|
||||
---
|
||||
glib/pcre/pcre_compile.c | 26 +++---
|
||||
glib/pcre/pcre_dfa_exec.c | 96 ++++++++--------
|
||||
glib/pcre/pcre_exec.c | 26 +++---
|
||||
glib/pcre/pcre_internal.h | 11 +--
|
||||
glib/pcre/pcre_tables.c | 16 +++
|
||||
glib/pcre/pcre_xclass.c | 24 ++--
|
||||
glib/pcre/ucp.h | 265 +++++++++++++++++++++++----------------------
|
||||
7 files changed, 239 insertions(+), 225 deletions(-)
|
||||
|
||||
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
|
||||
index 21bef80..a6c84e1 100644
|
||||
--- a/glib/pcre/pcre_compile.c
|
||||
+++ b/glib/pcre/pcre_compile.c
|
||||
@@ -2920,43 +2920,43 @@ Returns: TRUE if auto-possessifying is OK
|
||||
static BOOL
|
||||
check_char_prop(int c, int ptype, int pdata, BOOL negated)
|
||||
{
|
||||
-const ucd_record *prop = GET_UCD(c);
|
||||
+const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
switch(ptype)
|
||||
{
|
||||
case PT_LAMP:
|
||||
- return (prop->chartype == ucp_Lu ||
|
||||
- prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt) == negated;
|
||||
+ return (chartype == ucp_Lu ||
|
||||
+ chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt) == negated;
|
||||
|
||||
case PT_GC:
|
||||
- return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
|
||||
+ return (pdata == PRIV(ucp_gentype)[chartype]) == negated;
|
||||
|
||||
case PT_PC:
|
||||
- return (pdata == prop->chartype) == negated;
|
||||
+ return (pdata == chartype) == negated;
|
||||
|
||||
case PT_SC:
|
||||
- return (pdata == prop->script) == negated;
|
||||
+ return (pdata == UCD_SCRIPT(c)) == negated;
|
||||
|
||||
/* These are specials */
|
||||
|
||||
case PT_ALNUM:
|
||||
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
|
||||
+ return (PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N) == negated;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
||||
== negated;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR)
|
||||
== negated;
|
||||
|
||||
case PT_WORD:
|
||||
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ return (PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE) == negated;
|
||||
}
|
||||
return FALSE;
|
||||
diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
|
||||
index 9565d46..3f913ce 100644
|
||||
--- a/glib/pcre/pcre_dfa_exec.c
|
||||
+++ b/glib/pcre/pcre_dfa_exec.c
|
||||
@@ -1060,7 +1060,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
- const ucd_record * prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
switch(code[1])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1068,43 +1068,43 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt;
|
||||
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == code[2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- OK = prop->chartype == code[2];
|
||||
+ OK = chartype == code[2];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- OK = prop->script == code[2];
|
||||
+ OK = UCD_SCRIPT(c) == code[2];
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE;
|
||||
break;
|
||||
|
||||
@@ -1294,7 +1294,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
- const ucd_record * prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1302,43 +1302,43 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt;
|
||||
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- OK = prop->chartype == code[3];
|
||||
+ OK = chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- OK = prop->script == code[3];
|
||||
+ OK = UCD_SCRIPT(c) == code[3];
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE;
|
||||
break;
|
||||
|
||||
@@ -1541,7 +1541,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
- const ucd_record * prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1549,43 +1549,43 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt;
|
||||
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- OK = prop->chartype == code[3];
|
||||
+ OK = chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- OK = prop->script == code[3];
|
||||
+ OK = UCD_SCRIPT(c) == code[3];
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE;
|
||||
break;
|
||||
|
||||
@@ -1813,7 +1813,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
- const ucd_record * prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
switch(code[1 + IMM2_SIZE + 1])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1821,43 +1821,43 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt;
|
||||
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- OK = prop->chartype == code[1 + IMM2_SIZE + 2];
|
||||
+ OK = chartype == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- OK = prop->script == code[1 + IMM2_SIZE + 2];
|
||||
+ OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE;
|
||||
break;
|
||||
|
||||
diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
|
||||
index 830b8b5..c89a3f9 100644
|
||||
--- a/glib/pcre/pcre_exec.c
|
||||
+++ b/glib/pcre/pcre_exec.c
|
||||
@@ -2565,7 +2565,7 @@ for (;;)
|
||||
}
|
||||
GETCHARINCTEST(c, eptr);
|
||||
{
|
||||
- const ucd_record *prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
|
||||
switch(ecode[1])
|
||||
{
|
||||
@@ -2574,44 +2574,44 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- if ((prop->chartype == ucp_Lu ||
|
||||
- prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
|
||||
+ if ((chartype == ucp_Lu ||
|
||||
+ chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt) == (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
|
||||
+ if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- if ((ecode[2] != prop->chartype) == (op == OP_PROP))
|
||||
+ if ((ecode[2] != chartype) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- if ((ecode[2] != prop->script) == (op == OP_PROP))
|
||||
+ if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
/* These are specials */
|
||||
|
||||
case PT_ALNUM:
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
||||
== (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR)
|
||||
== (op == OP_NOTPROP))
|
||||
@@ -2619,8 +2619,8 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
|
||||
index 181c312..234af1b 100644
|
||||
--- a/glib/pcre/pcre_internal.h
|
||||
+++ b/glib/pcre/pcre_internal.h
|
||||
@@ -2329,15 +2329,12 @@ extern const int PRIV(ucp_typerange)[];
|
||||
#ifdef SUPPORT_UCP
|
||||
/* UCD access macros */
|
||||
|
||||
-#define UCD_BLOCK_SIZE 128
|
||||
-#define GET_UCD(ch) (PRIV(ucd_records) + \
|
||||
- PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
|
||||
- UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
|
||||
+unsigned int _pcre_ucp_othercase(const unsigned int c);
|
||||
|
||||
-#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
||||
-#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
||||
+#define UCD_CHARTYPE(ch) (pcre_uint8)g_unichar_type((gunichar)(ch))
|
||||
+#define UCD_SCRIPT(ch) (pcre_uint8)g_unichar_get_script((gunichar)(ch))
|
||||
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
|
||||
-#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
|
||||
+#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch))
|
||||
|
||||
#endif /* SUPPORT_UCP */
|
||||
|
||||
diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
|
||||
index 7ac2d89..e401974 100644
|
||||
--- a/glib/pcre/pcre_tables.c
|
||||
+++ b/glib/pcre/pcre_tables.c
|
||||
@@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = {
|
||||
|
||||
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
||||
+unsigned int
|
||||
+_pcre_ucp_othercase(const unsigned int c)
|
||||
+{
|
||||
+ int other_case = NOTACHAR;
|
||||
+
|
||||
+ if (g_unichar_islower(c))
|
||||
+ other_case = g_unichar_toupper(c);
|
||||
+ else if (g_unichar_isupper(c))
|
||||
+ other_case = g_unichar_tolower(c);
|
||||
+
|
||||
+ if (other_case == c)
|
||||
+ other_case = NOTACHAR;
|
||||
+
|
||||
+ return other_case;
|
||||
+}
|
||||
+
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
/* End of pcre_tables.c */
|
||||
diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c
|
||||
index dca7a39..e5a55d7 100644
|
||||
--- a/glib/pcre/pcre_xclass.c
|
||||
+++ b/glib/pcre/pcre_xclass.c
|
||||
@@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END)
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
- const ucd_record *prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
@@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
|
||||
+ if ((chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
|
||||
+ if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
|
||||
+ if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
|
||||
+ if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
||||
== (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
|
||||
== (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h
|
||||
index 59c3bec..53a48c9 100644
|
||||
--- a/glib/pcre/ucp.h
|
||||
+++ b/glib/pcre/ucp.h
|
||||
@@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode
|
||||
should always be at the end of each enum, for backwards compatibility. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
+#include "gunicode.h"
|
||||
|
||||
enum {
|
||||
ucp_C, /* Other */
|
||||
@@ -24,148 +25,148 @@ enum {
|
||||
/* These are the particular character types. */
|
||||
|
||||
enum {
|
||||
- ucp_Cc, /* Control */
|
||||
- ucp_Cf, /* Format */
|
||||
- ucp_Cn, /* Unassigned */
|
||||
- ucp_Co, /* Private use */
|
||||
- ucp_Cs, /* Surrogate */
|
||||
- ucp_Ll, /* Lower case letter */
|
||||
- ucp_Lm, /* Modifier letter */
|
||||
- ucp_Lo, /* Other letter */
|
||||
- ucp_Lt, /* Title case letter */
|
||||
- ucp_Lu, /* Upper case letter */
|
||||
- ucp_Mc, /* Spacing mark */
|
||||
- ucp_Me, /* Enclosing mark */
|
||||
- ucp_Mn, /* Non-spacing mark */
|
||||
- ucp_Nd, /* Decimal number */
|
||||
- ucp_Nl, /* Letter number */
|
||||
- ucp_No, /* Other number */
|
||||
- ucp_Pc, /* Connector punctuation */
|
||||
- ucp_Pd, /* Dash punctuation */
|
||||
- ucp_Pe, /* Close punctuation */
|
||||
- ucp_Pf, /* Final punctuation */
|
||||
- ucp_Pi, /* Initial punctuation */
|
||||
- ucp_Po, /* Other punctuation */
|
||||
- ucp_Ps, /* Open punctuation */
|
||||
- ucp_Sc, /* Currency symbol */
|
||||
- ucp_Sk, /* Modifier symbol */
|
||||
- ucp_Sm, /* Mathematical symbol */
|
||||
- ucp_So, /* Other symbol */
|
||||
- ucp_Zl, /* Line separator */
|
||||
- ucp_Zp, /* Paragraph separator */
|
||||
- ucp_Zs /* Space separator */
|
||||
+ ucp_Cc = G_UNICODE_CONTROL, /* Control */
|
||||
+ ucp_Cf = G_UNICODE_FORMAT, /* Format */
|
||||
+ ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */
|
||||
+ ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */
|
||||
+ ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */
|
||||
+ ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */
|
||||
+ ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */
|
||||
+ ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */
|
||||
+ ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */
|
||||
+ ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */
|
||||
+ ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */
|
||||
+ ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */
|
||||
+ ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */
|
||||
+ ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */
|
||||
+ ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */
|
||||
+ ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */
|
||||
+ ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */
|
||||
+ ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */
|
||||
+ ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */
|
||||
+ ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */
|
||||
+ ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */
|
||||
+ ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */
|
||||
+ ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */
|
||||
+ ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */
|
||||
+ ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */
|
||||
+ ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */
|
||||
+ ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */
|
||||
+ ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */
|
||||
+ ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */
|
||||
+ ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
- ucp_Arabic,
|
||||
- ucp_Armenian,
|
||||
- ucp_Bengali,
|
||||
- ucp_Bopomofo,
|
||||
- ucp_Braille,
|
||||
- ucp_Buginese,
|
||||
- ucp_Buhid,
|
||||
- ucp_Canadian_Aboriginal,
|
||||
- ucp_Cherokee,
|
||||
- ucp_Common,
|
||||
- ucp_Coptic,
|
||||
- ucp_Cypriot,
|
||||
- ucp_Cyrillic,
|
||||
- ucp_Deseret,
|
||||
- ucp_Devanagari,
|
||||
- ucp_Ethiopic,
|
||||
- ucp_Georgian,
|
||||
- ucp_Glagolitic,
|
||||
- ucp_Gothic,
|
||||
- ucp_Greek,
|
||||
- ucp_Gujarati,
|
||||
- ucp_Gurmukhi,
|
||||
- ucp_Han,
|
||||
- ucp_Hangul,
|
||||
- ucp_Hanunoo,
|
||||
- ucp_Hebrew,
|
||||
- ucp_Hiragana,
|
||||
- ucp_Inherited,
|
||||
- ucp_Kannada,
|
||||
- ucp_Katakana,
|
||||
- ucp_Kharoshthi,
|
||||
- ucp_Khmer,
|
||||
- ucp_Lao,
|
||||
- ucp_Latin,
|
||||
- ucp_Limbu,
|
||||
- ucp_Linear_B,
|
||||
- ucp_Malayalam,
|
||||
- ucp_Mongolian,
|
||||
- ucp_Myanmar,
|
||||
- ucp_New_Tai_Lue,
|
||||
- ucp_Ogham,
|
||||
- ucp_Old_Italic,
|
||||
- ucp_Old_Persian,
|
||||
- ucp_Oriya,
|
||||
- ucp_Osmanya,
|
||||
- ucp_Runic,
|
||||
- ucp_Shavian,
|
||||
- ucp_Sinhala,
|
||||
- ucp_Syloti_Nagri,
|
||||
- ucp_Syriac,
|
||||
- ucp_Tagalog,
|
||||
- ucp_Tagbanwa,
|
||||
- ucp_Tai_Le,
|
||||
- ucp_Tamil,
|
||||
- ucp_Telugu,
|
||||
- ucp_Thaana,
|
||||
- ucp_Thai,
|
||||
- ucp_Tibetan,
|
||||
- ucp_Tifinagh,
|
||||
- ucp_Ugaritic,
|
||||
- ucp_Yi,
|
||||
+ ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
|
||||
+ ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
|
||||
+ ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
|
||||
+ ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
|
||||
+ ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
|
||||
+ ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
|
||||
+ ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
|
||||
+ ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
|
||||
+ ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
|
||||
+ ucp_Common = G_UNICODE_SCRIPT_COMMON,
|
||||
+ ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
|
||||
+ ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
|
||||
+ ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
|
||||
+ ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
|
||||
+ ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
|
||||
+ ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
|
||||
+ ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
|
||||
+ ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
|
||||
+ ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
|
||||
+ ucp_Greek = G_UNICODE_SCRIPT_GREEK,
|
||||
+ ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
|
||||
+ ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
|
||||
+ ucp_Han = G_UNICODE_SCRIPT_HAN,
|
||||
+ ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
|
||||
+ ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
|
||||
+ ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
|
||||
+ ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
|
||||
+ ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
|
||||
+ ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
|
||||
+ ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
|
||||
+ ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
|
||||
+ ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
|
||||
+ ucp_Lao = G_UNICODE_SCRIPT_LAO,
|
||||
+ ucp_Latin = G_UNICODE_SCRIPT_LATIN,
|
||||
+ ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
|
||||
+ ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
|
||||
+ ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
|
||||
+ ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
|
||||
+ ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
|
||||
+ ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
|
||||
+ ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
|
||||
+ ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
|
||||
+ ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
|
||||
+ ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
|
||||
+ ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
|
||||
+ ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
|
||||
+ ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
|
||||
+ ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
|
||||
+ ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
|
||||
+ ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
|
||||
+ ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
|
||||
+ ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
|
||||
+ ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
|
||||
+ ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
|
||||
+ ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
|
||||
+ ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
|
||||
+ ucp_Thai = G_UNICODE_SCRIPT_THAI,
|
||||
+ ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
|
||||
+ ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
|
||||
+ ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
|
||||
+ ucp_Yi = G_UNICODE_SCRIPT_YI,
|
||||
/* New for Unicode 5.0: */
|
||||
- ucp_Balinese,
|
||||
- ucp_Cuneiform,
|
||||
- ucp_Nko,
|
||||
- ucp_Phags_Pa,
|
||||
- ucp_Phoenician,
|
||||
+ ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
|
||||
+ ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
|
||||
+ ucp_Nko = G_UNICODE_SCRIPT_NKO,
|
||||
+ ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
|
||||
+ ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
|
||||
/* New for Unicode 5.1: */
|
||||
- ucp_Carian,
|
||||
- ucp_Cham,
|
||||
- ucp_Kayah_Li,
|
||||
- ucp_Lepcha,
|
||||
- ucp_Lycian,
|
||||
- ucp_Lydian,
|
||||
- ucp_Ol_Chiki,
|
||||
- ucp_Rejang,
|
||||
- ucp_Saurashtra,
|
||||
- ucp_Sundanese,
|
||||
- ucp_Vai,
|
||||
+ ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
|
||||
+ ucp_Cham = G_UNICODE_SCRIPT_CHAM,
|
||||
+ ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
|
||||
+ ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
|
||||
+ ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
|
||||
+ ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
|
||||
+ ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
|
||||
+ ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
|
||||
+ ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
|
||||
+ ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
|
||||
+ ucp_Vai = G_UNICODE_SCRIPT_VAI,
|
||||
/* New for Unicode 5.2: */
|
||||
- ucp_Avestan,
|
||||
- ucp_Bamum,
|
||||
- ucp_Egyptian_Hieroglyphs,
|
||||
- ucp_Imperial_Aramaic,
|
||||
- ucp_Inscriptional_Pahlavi,
|
||||
- ucp_Inscriptional_Parthian,
|
||||
- ucp_Javanese,
|
||||
- ucp_Kaithi,
|
||||
- ucp_Lisu,
|
||||
- ucp_Meetei_Mayek,
|
||||
- ucp_Old_South_Arabian,
|
||||
- ucp_Old_Turkic,
|
||||
- ucp_Samaritan,
|
||||
- ucp_Tai_Tham,
|
||||
- ucp_Tai_Viet,
|
||||
+ ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
|
||||
+ ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
|
||||
+ ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
|
||||
+ ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
|
||||
+ ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
|
||||
+ ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
|
||||
+ ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
|
||||
+ ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
|
||||
+ ucp_Lisu = G_UNICODE_SCRIPT_LISU,
|
||||
+ ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
|
||||
+ ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
|
||||
+ ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
|
||||
+ ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
|
||||
+ ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
|
||||
+ ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
|
||||
/* New for Unicode 6.0.0: */
|
||||
- ucp_Batak,
|
||||
- ucp_Brahmi,
|
||||
- ucp_Mandaic,
|
||||
+ ucp_Batak = G_UNICODE_SCRIPT_BATAK,
|
||||
+ ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
|
||||
+ ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
|
||||
/* New for Unicode 6.1.0: */
|
||||
- ucp_Chakma,
|
||||
- ucp_Meroitic_Cursive,
|
||||
- ucp_Meroitic_Hieroglyphs,
|
||||
- ucp_Miao,
|
||||
- ucp_Sharada,
|
||||
- ucp_Sora_Sompeng,
|
||||
- ucp_Takri
|
||||
+ ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
|
||||
+ ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
|
||||
+ ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
|
||||
+ ucp_Miao = G_UNICODE_SCRIPT_MIAO,
|
||||
+ ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
|
||||
+ ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
|
||||
+ ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
|
||||
};
|
||||
|
||||
#endif
|
||||
--
|
||||
1.7.5.1.217.g4e3aa.dirty
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
#! /bin/sh
|
||||
|
||||
IN="../update-pcre"
|
||||
PCRE=$1
|
||||
|
||||
if [ "x$PCRE" = x -o "x$PCRE" = x--help -o "x$PCRE" = x-h ]; then
|
||||
cat >&2 << EOF
|
||||
|
||||
$0 PCRE-DIR
|
||||
|
||||
Updates the local PCRE copy with a different version of the library,
|
||||
contained in the directory PCRE-DIR.
|
||||
|
||||
This will delete the content of the local pcre directory, copy the
|
||||
necessary files from PCRE-DIR, and generate other needed files, such
|
||||
as Makefile.am
|
||||
EOF
|
||||
exit
|
||||
fi
|
||||
|
||||
if [ ! -f gregex.h ]; then
|
||||
echo "This script should be executed from the directory containing gregex.c." 2> /dev/null
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f $PCRE/Makefile.in -o ! -f $PCRE/pcre_compile.c ]; then
|
||||
echo "'$PCRE' does not contain a valid PCRE version." 2> /dev/null
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
echo "Deleting old PCRE library"
|
||||
mv pcre/.svn tmp-pcre-svn
|
||||
rm -R pcre 2> /dev/null
|
||||
mkdir pcre
|
||||
cd pcre
|
||||
|
||||
# pcre_chartables.c is generated by dfatables.
|
||||
# We do not want to compile and execute dfatables.c every time, because
|
||||
# this could be a problem (e.g. when cross-compiling), so now generate
|
||||
# the file and then distribuite it with GRegex.
|
||||
echo "Generating pcre_chartables.c"
|
||||
cp -R $PCRE tmp-build
|
||||
cd tmp-build
|
||||
./configure --enable-utf8 --enable-unicode-properties --disable-cpp > /dev/null
|
||||
make pcre_chartables.c > /dev/null
|
||||
cat > ../pcre_chartables.c << \EOF
|
||||
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||
* the update of the local copy of PCRE.
|
||||
*/
|
||||
EOF
|
||||
cat pcre_chartables.c >> ../pcre_chartables.c
|
||||
cd ..
|
||||
rm -R tmp-build
|
||||
|
||||
# Compiled C files.
|
||||
echo "Generating makefiles"
|
||||
all_files=`awk '/^OBJ = /, /^\\s*$/ \
|
||||
{ \
|
||||
sub("^OBJ = ", ""); \
|
||||
sub(".@OBJEXT@[[:blank:]]*\\\\\\\\", ""); \
|
||||
sub("\\\\$\\\\(POSIX_OBJ\\\\)", ""); \
|
||||
print; \
|
||||
}' \
|
||||
$PCRE/Makefile.in`
|
||||
|
||||
# Headers.
|
||||
included_files="pcre.h pcre_internal.h ucp.h ucpinternal.h"
|
||||
|
||||
# Generate Makefile.am.
|
||||
cat $IN/Makefile.am-1 > Makefile.am
|
||||
for name in $all_files; do
|
||||
echo " $name.c \\" >> Makefile.am
|
||||
if [ $name != pcre_chartables ]; then
|
||||
# pcre_chartables.c is a generated file.
|
||||
cp $PCRE/$name.c .
|
||||
fi
|
||||
done
|
||||
for f in $included_files; do
|
||||
echo " $f \\" >> Makefile.am
|
||||
cp $PCRE/$f .
|
||||
done
|
||||
cat $IN/Makefile.am-2 >> Makefile.am
|
||||
|
||||
# Generate makefile.msc
|
||||
cat > makefile.msc << EOF
|
||||
TOP = ..\..\..
|
||||
!INCLUDE ..\..\build\win32\make.msc
|
||||
|
||||
INCLUDES = \\
|
||||
-I ..\.. \\
|
||||
-I ..
|
||||
|
||||
DEFINES = \\
|
||||
-DPCRE_STATIC \\
|
||||
-DHAVE_CONFIG_H \\
|
||||
-DHAVE_LONG_LONG_FORMAT \\
|
||||
-DSUPPORT_UCP \\
|
||||
-DSUPPORT_UTF8 \\
|
||||
-DNEWLINE=-1 \\
|
||||
-DMATCH_LIMIT=10000000 \\
|
||||
-DMATCH_LIMIT_RECURSION=10000000 \\
|
||||
-DMAX_NAME_SIZE=32 \\
|
||||
-DMAX_NAME_COUNT=10000 \\
|
||||
-DMAX_DUPLENGTH=30000 \\
|
||||
-DLINK_SIZE=2 \\
|
||||
-DEBCDIC=0 \\
|
||||
-DPOSIX_MALLOC_THRESHOLD=10
|
||||
|
||||
OBJECTS = \\
|
||||
`
|
||||
for f in $all_files; do
|
||||
echo " $f.obj \\\\"
|
||||
done
|
||||
`
|
||||
|
||||
all : pcre.lib
|
||||
|
||||
pcre.lib : \$(OBJECTS)
|
||||
lib -out:pcre.lib \$(OBJECTS)
|
||||
EOF
|
||||
|
||||
echo "Patching PCRE"
|
||||
|
||||
# Copy the license.
|
||||
cp $PCRE/COPYING .
|
||||
|
||||
# Use glib for memory allocation.
|
||||
patch > /dev/null < $IN/memory.patch
|
||||
|
||||
# Copy the modified version of pcre_valid_utf8.c.
|
||||
cp $IN/pcre_valid_utf8.c .
|
||||
|
||||
# Copy the modified version of pcre_ucp_searchfuncs.c that uses glib
|
||||
# for Unicode properties.
|
||||
cp $IN/pcre_ucp_searchfuncs.c .
|
||||
patch > /dev/null < $IN/ucp.patch
|
||||
|
||||
# Remove the digitab array in pcre_compile.c.
|
||||
patch > /dev/null < $IN/digitab.patch
|
||||
sed -i -e 's/(digitab\[\(.*\)\] & ctype_digit)/g_ascii_isdigit(\1)/' pcre_compile.c
|
||||
sed -i -e 's/(digitab\[\(.*\)\] & ctype_xdigit)/g_ascii_isxdigit(\1)/' pcre_compile.c
|
||||
|
||||
# Reduce the number of relocations.
|
||||
python $IN/make_utt.py
|
||||
patch > /dev/null < $IN/utt.patch
|
||||
patch > /dev/null < $IN/table-reduction.patch
|
||||
|
||||
# Copy back the old SVN directory.
|
||||
mv ../tmp-pcre-svn .svn
|
||||
|
||||
|
||||
cat << EOF
|
||||
|
||||
Update completed. You now should check that everything is working.
|
||||
Remember to update the regex syntax doc with the new features
|
||||
(docs/reference/glib/regex-syntax.sgml) and to run the tests.
|
||||
EOF
|
||||
|
||||
Reference in New Issue
Block a user