mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-26 05:56:14 +01:00
Merge branch 'wip/pwithnall/962-drop-embedded-pcre' into 'main'
pcre: Drop internal libpcre copy Closes #962 and #642 See merge request GNOME/glib!2144
This commit is contained in:
commit
74595ab64a
@ -10,11 +10,11 @@ cache:
|
||||
- _ccache/
|
||||
|
||||
variables:
|
||||
FEDORA_IMAGE: "registry.gitlab.gnome.org/gnome/glib/fedora:v10"
|
||||
FEDORA_IMAGE: "registry.gitlab.gnome.org/gnome/glib/fedora:v11"
|
||||
COVERITY_IMAGE: "registry.gitlab.gnome.org/gnome/glib/coverity:v1"
|
||||
DEBIAN_IMAGE: "registry.gitlab.gnome.org/gnome/glib/debian-stable:v7"
|
||||
ANDROID_IMAGE: "registry.gitlab.gnome.org/gnome/glib/android-ndk:v3"
|
||||
MINGW_IMAGE: "registry.gitlab.gnome.org/gnome/glib/mingw:v2"
|
||||
DEBIAN_IMAGE: "registry.gitlab.gnome.org/gnome/glib/debian-stable:v8"
|
||||
ANDROID_IMAGE: "registry.gitlab.gnome.org/gnome/glib/android-ndk:v4"
|
||||
MINGW_IMAGE: "registry.gitlab.gnome.org/gnome/glib/mingw:v3"
|
||||
MESON_TEST_TIMEOUT_MULTIPLIER: 2
|
||||
G_MESSAGES_DEBUG: all
|
||||
MESON_COMMON_OPTIONS_NO_WARNING: "--buildtype debug --wrap-mode=nodownload"
|
||||
@ -270,7 +270,10 @@ cross-android_api21_arm64:
|
||||
# FIXME: add --werror
|
||||
# We use -Diconv=auto to test that we successfully detect that iconv is not
|
||||
# provided by android api 21, and detect the external iconv instead.
|
||||
- meson ${MESON_COMMON_OPTIONS} --cross-file=/opt/cross_file_android_arm64_21.txt -Diconv=auto -Dinternal_pcre=true _build
|
||||
# FIXME: Work around a bug in Meson 0.49 where --wrap-mode=nodownload also
|
||||
# disables fallback subprojects, by passing --wrap-mode=default. Fixed in
|
||||
# Meson commit 47b9c1a564756ac48a55da9a7c4d91787399c645
|
||||
- meson ${MESON_COMMON_OPTIONS} --cross-file=/opt/cross_file_android_arm64_21.txt -Diconv=auto --wrap-mode=default _build
|
||||
- ninja -C _build
|
||||
|
||||
cross-android_api28_arm64:
|
||||
@ -278,7 +281,10 @@ cross-android_api28_arm64:
|
||||
image: $ANDROID_IMAGE
|
||||
script:
|
||||
# FIXME: add --werror
|
||||
- meson ${MESON_COMMON_OPTIONS} --cross-file=/opt/cross_file_android_arm64_28.txt -Dinternal_pcre=true _build
|
||||
# FIXME: Work around a bug in Meson 0.49 where --wrap-mode=nodownload also
|
||||
# disables fallback subprojects, by passing --wrap-mode=default. Fixed in
|
||||
# Meson commit 47b9c1a564756ac48a55da9a7c4d91787399c645
|
||||
- meson ${MESON_COMMON_OPTIONS} --cross-file=/opt/cross_file_android_arm64_28.txt --wrap-mode=default _build
|
||||
- ninja -C _build
|
||||
|
||||
cross-mingw64:
|
||||
@ -286,7 +292,10 @@ cross-mingw64:
|
||||
image: $MINGW_IMAGE
|
||||
script:
|
||||
# FIXME: Add --werror
|
||||
- meson ${MESON_COMMON_OPTIONS} --cross-file=/opt/cross_file_mingw64.txt _build
|
||||
# FIXME: Work around a bug in Meson 0.49 where --wrap-mode=nodownload also
|
||||
# disables fallback subprojects, by passing --wrap-mode=default. Fixed in
|
||||
# Meson commit 47b9c1a564756ac48a55da9a7c4d91787399c645
|
||||
- meson ${MESON_COMMON_OPTIONS} --cross-file=/opt/cross_file_mingw64.txt --wrap-mode=default _build
|
||||
- ninja -C _build
|
||||
|
||||
msys2-mingw32:
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM fedora:28
|
||||
FROM fedora:31
|
||||
|
||||
RUN dnf -y install \
|
||||
autoconf \
|
||||
@ -44,6 +44,7 @@ RUN dnf -y install \
|
||||
ncurses-compat-libs \
|
||||
ninja-build \
|
||||
pcre-devel \
|
||||
python-unversioned-command \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-wheel \
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM fedora:29
|
||||
FROM fedora:31
|
||||
|
||||
RUN dnf -y install \
|
||||
bindfs \
|
||||
|
@ -153,12 +153,9 @@
|
||||
<listitem>
|
||||
<para>
|
||||
GRegex uses the <ulink url="http://www.pcre.org/">PCRE library</ulink>
|
||||
for regular expression matching. The default is to use the system
|
||||
version of PCRE, to reduce the chances of security fixes going out
|
||||
of sync. GLib additionally provides an internal copy of PCRE in case
|
||||
the system version is too old, or does not support UTF-8; the internal
|
||||
copy is patched to use GLib for memory management and to share the
|
||||
same Unicode tables.
|
||||
for regular expression matching. The system version of PCRE is used,
|
||||
unless not available (which is the case on Android), in which case a
|
||||
fallback subproject is used.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
@ -235,45 +232,6 @@
|
||||
</para>
|
||||
</formalpara>
|
||||
|
||||
<formalpara>
|
||||
<title><option>-Dinternal_pcre=true</option></title>
|
||||
|
||||
<para>
|
||||
Normally, GLib will be configured to use the system-supplied PCRE
|
||||
library if it is suitable, falling back to an internal version
|
||||
otherwise. If this option is specified, the internal version will always
|
||||
be used.
|
||||
</para>
|
||||
<para>
|
||||
Using the internal PCRE is the preferred solution if:
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
your system has strict resource constraints; the system-supplied
|
||||
PCRE has a separated copy of the tables used for Unicode
|
||||
handling, whereas the internal copy shares the Unicode tables
|
||||
used by GLib.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
your system has PCRE built without some needed features,
|
||||
such as UTF-8 and Unicode support.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
you are planning to use both GRegex and PCRE API at the same
|
||||
time, either directly or indirectly through a dependency; PCRE
|
||||
uses some global variables for memory management and
|
||||
other features, and if both GLib and PCRE try to access them
|
||||
at the same time, this could lead to undefined behavior.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</formalpara>
|
||||
|
||||
<formalpara>
|
||||
<title><option>-Dbsymbolic_functions=false</option> and
|
||||
<option>-Dbsymbolic_functions=true</option></title>
|
||||
|
@ -25,8 +25,6 @@ if get_option('gtk_doc')
|
||||
'gtrace-private.h',
|
||||
'glib-mirroring-tab',
|
||||
'gnulib',
|
||||
'pcre',
|
||||
'update-pcre',
|
||||
'gbytesprivate.h',
|
||||
'gvariant-internal.h',
|
||||
'gvariant-serialiser.h',
|
||||
|
@ -22,11 +22,7 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef USE_SYSTEM_PCRE
|
||||
#include <pcre.h>
|
||||
#else
|
||||
#include "pcre/pcre.h"
|
||||
#endif
|
||||
|
||||
#include "gtypes.h"
|
||||
#include "gregex.h"
|
||||
|
@ -3,9 +3,6 @@ configure_file(input : 'glibconfig.h.in', output : 'glibconfig.h',
|
||||
configuration : glibconfig_conf)
|
||||
|
||||
subdir('libcharset')
|
||||
if not use_system_pcre
|
||||
subdir('pcre')
|
||||
endif
|
||||
|
||||
# libsysprof-capture support
|
||||
libsysprof_capture_dep = dependency('sysprof-capture-4', version: '>= 3.38.0',
|
||||
@ -23,7 +20,7 @@ libsysprof_capture_dep = dependency('sysprof-capture-4', version: '>= 3.38.0',
|
||||
)
|
||||
glib_conf.set('HAVE_SYSPROF', libsysprof_capture_dep.found())
|
||||
|
||||
# TODO: gnulib_objects, pcre_objects and pcre_deps are a workaround for
|
||||
# TODO: gnulib_objects is a workaround for
|
||||
# <https://github.com/mesonbuild/meson/issues/3934> and
|
||||
# <https://github.com/mesonbuild/meson/issues/3937>. When we can depend
|
||||
# on a meson version where those are fixed, revert the commit that
|
||||
@ -358,19 +355,11 @@ if use_pcre_static_flag
|
||||
pcre_static_args = ['-DPCRE_STATIC']
|
||||
endif
|
||||
|
||||
if use_system_pcre
|
||||
pcre_deps = [pcre]
|
||||
pcre_objects = []
|
||||
else
|
||||
pcre_deps = []
|
||||
pcre_objects = [libpcre.extract_all_objects()]
|
||||
endif
|
||||
|
||||
glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre_static_args + glib_hidden_visibility_args
|
||||
libglib = library('glib-2.0',
|
||||
glib_dtrace_obj, glib_dtrace_hdr,
|
||||
sources : [deprecated_sources, glib_sources],
|
||||
objects : [charset_lib.extract_all_objects()] + gnulib_objects + pcre_objects,
|
||||
objects : [charset_lib.extract_all_objects()] + gnulib_objects,
|
||||
version : library_version,
|
||||
soversion : soversion,
|
||||
darwin_versions : darwin_versions,
|
||||
@ -378,7 +367,7 @@ libglib = library('glib-2.0',
|
||||
# intl.lib is not compatible with SAFESEH
|
||||
link_args : [noseh_link_args, glib_link_flags, win32_ldflags],
|
||||
include_directories : configinc,
|
||||
dependencies : pcre_deps + [thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep],
|
||||
dependencies : [pcre, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep],
|
||||
c_args : glib_c_args,
|
||||
objc_args : glib_c_args,
|
||||
)
|
||||
|
@ -1,5 +0,0 @@
|
||||
PCRE LICENCE
|
||||
|
||||
Please see the file LICENCE in the PCRE distribution for licensing details.
|
||||
|
||||
End
|
@ -1,50 +0,0 @@
|
||||
pcre_sources = [
|
||||
'pcre_byte_order.c',
|
||||
'pcre_chartables.c',
|
||||
'pcre_compile.c',
|
||||
'pcre_config.c',
|
||||
'pcre_dfa_exec.c',
|
||||
'pcre_exec.c',
|
||||
'pcre_fullinfo.c',
|
||||
'pcre_get.c',
|
||||
'pcre_globals.c',
|
||||
'pcre_jit_compile.c',
|
||||
'pcre_newline.c',
|
||||
'pcre_ord2utf8.c',
|
||||
'pcre_string_utils.c',
|
||||
'pcre_study.c',
|
||||
'pcre_tables.c',
|
||||
'pcre_valid_utf8.c',
|
||||
'pcre_version.c',
|
||||
'pcre_xclass.c',
|
||||
'pcre.h',
|
||||
'pcre_internal.h',
|
||||
'ucp.h',
|
||||
]
|
||||
|
||||
libpcre = static_library('pcre',
|
||||
sources : [pcre_sources],
|
||||
include_directories : [configinc, glibinc],
|
||||
pic : true,
|
||||
c_args : [
|
||||
'-DG_LOG_DOMAIN="GLib-GRegex"',
|
||||
'-DHAVE_MEMMOVE',
|
||||
'-DSUPPORT_UCP',
|
||||
'-DSUPPORT_UTF',
|
||||
'-DSUPPORT_UTF8',
|
||||
'-DNEWLINE=-1',
|
||||
'-DMATCH_LIMIT=10000000',
|
||||
'-DMATCH_LIMIT_RECURSION=8192',
|
||||
'-DMAX_NAME_SIZE=32',
|
||||
'-DMAX_NAME_COUNT=10000',
|
||||
'-DMAX_DUPLENGTH=30000',
|
||||
'-DLINK_SIZE=2',
|
||||
'-DPOSIX_MALLOC_THRESHOLD=10',
|
||||
'-DPCRE_STATIC',
|
||||
'-UBSR_ANYCRLF',
|
||||
'-UEBCDIC',
|
||||
'-DGLIB_COMPILATION'
|
||||
] + glib_hidden_visibility_args
|
||||
)
|
||||
|
||||
pcre = declare_dependency(link_with : libpcre)
|
507
glib/pcre/pcre.h
507
glib/pcre/pcre.h
@ -1,507 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, to be #included by
|
||||
applications that call the PCRE functions.
|
||||
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 8
|
||||
#define PCRE_MINOR 31
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2012-07-06
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
||||
# ifndef PCRE_EXP_DECL
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
||||
it is needed here for malloc. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options. Some are compile-time only, some are run-time only, and some are
|
||||
both, so we keep them all distinct. However, almost all the bits in the options
|
||||
word are now used. In the long run, we may have to re-use some of the
|
||||
compile-time only bits for runtime options, or vice versa. In the comments
|
||||
below, "compile", "exec", and "DFA exec" mean that the option is permitted to
|
||||
be set for those functions; "used in" means that an option may be set only for
|
||||
compile, but is subsequently referenced in exec and/or DFA exec. Any of the
|
||||
compile-time options may be inspected during studying (and therefore JIT
|
||||
compiling). */
|
||||
|
||||
#define PCRE_CASELESS 0x00000001 /* Compile */
|
||||
#define PCRE_MULTILINE 0x00000002 /* Compile */
|
||||
#define PCRE_DOTALL 0x00000004 /* Compile */
|
||||
#define PCRE_EXTENDED 0x00000008 /* Compile */
|
||||
#define PCRE_ANCHORED 0x00000010 /* Compile, exec, DFA exec */
|
||||
#define PCRE_DOLLAR_ENDONLY 0x00000020 /* Compile, used in exec, DFA exec */
|
||||
#define PCRE_EXTRA 0x00000040 /* Compile */
|
||||
#define PCRE_NOTBOL 0x00000080 /* Exec, DFA exec */
|
||||
#define PCRE_NOTEOL 0x00000100 /* Exec, DFA exec */
|
||||
#define PCRE_UNGREEDY 0x00000200 /* Compile */
|
||||
#define PCRE_NOTEMPTY 0x00000400 /* Exec, DFA exec */
|
||||
/* The next two are also used in exec and DFA exec */
|
||||
#define PCRE_UTF8 0x00000800 /* Compile (same as PCRE_UTF16) */
|
||||
#define PCRE_UTF16 0x00000800 /* Compile (same as PCRE_UTF8) */
|
||||
#define PCRE_NO_AUTO_CAPTURE 0x00001000 /* Compile */
|
||||
/* The next two are also used in exec and DFA exec */
|
||||
#define PCRE_NO_UTF8_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF16_CHECK) */
|
||||
#define PCRE_NO_UTF16_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF8_CHECK) */
|
||||
#define PCRE_AUTO_CALLOUT 0x00004000 /* Compile */
|
||||
#define PCRE_PARTIAL_SOFT 0x00008000 /* Exec, DFA exec */
|
||||
#define PCRE_PARTIAL 0x00008000 /* Backwards compatible synonym */
|
||||
#define PCRE_DFA_SHORTEST 0x00010000 /* DFA exec */
|
||||
#define PCRE_DFA_RESTART 0x00020000 /* DFA exec */
|
||||
#define PCRE_FIRSTLINE 0x00040000 /* Compile, used in exec, DFA exec */
|
||||
#define PCRE_DUPNAMES 0x00080000 /* Compile */
|
||||
#define PCRE_NEWLINE_CR 0x00100000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_NEWLINE_LF 0x00200000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_NEWLINE_ANY 0x00400000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_BSR_ANYCRLF 0x00800000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_BSR_UNICODE 0x01000000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* Compile, used in exec */
|
||||
#define PCRE_NO_START_OPTIMIZE 0x04000000 /* Compile, exec, DFA exec */
|
||||
#define PCRE_NO_START_OPTIMISE 0x04000000 /* Synonym */
|
||||
#define PCRE_PARTIAL_HARD 0x08000000 /* Exec, DFA exec */
|
||||
#define PCRE_NOTEMPTY_ATSTART 0x10000000 /* Exec, DFA exec */
|
||||
#define PCRE_UCP 0x20000000 /* Compile, used in exec, DFA exec */
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
#define PCRE_ERROR_NOMATCH (-1)
|
||||
#define PCRE_ERROR_NULL (-2)
|
||||
#define PCRE_ERROR_BADOPTION (-3)
|
||||
#define PCRE_ERROR_BADMAGIC (-4)
|
||||
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
|
||||
#define PCRE_ERROR_NOMEMORY (-6)
|
||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||
#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_PARTIAL (-12)
|
||||
#define PCRE_ERROR_BADPARTIAL (-13)
|
||||
#define PCRE_ERROR_INTERNAL (-14)
|
||||
#define PCRE_ERROR_BADCOUNT (-15)
|
||||
#define PCRE_ERROR_DFA_UITEM (-16)
|
||||
#define PCRE_ERROR_DFA_UCOND (-17)
|
||||
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
|
||||
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||
#define PCRE_ERROR_BADOFFSET (-24)
|
||||
#define PCRE_ERROR_SHORTUTF8 (-25)
|
||||
#define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_RECURSELOOP (-26)
|
||||
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
|
||||
#define PCRE_ERROR_BADMODE (-28)
|
||||
#define PCRE_ERROR_BADENDIANNESS (-29)
|
||||
#define PCRE_ERROR_DFA_BADRESTART (-30)
|
||||
|
||||
/* Specific error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE_UTF8_ERR0 0
|
||||
#define PCRE_UTF8_ERR1 1
|
||||
#define PCRE_UTF8_ERR2 2
|
||||
#define PCRE_UTF8_ERR3 3
|
||||
#define PCRE_UTF8_ERR4 4
|
||||
#define PCRE_UTF8_ERR5 5
|
||||
#define PCRE_UTF8_ERR6 6
|
||||
#define PCRE_UTF8_ERR7 7
|
||||
#define PCRE_UTF8_ERR8 8
|
||||
#define PCRE_UTF8_ERR9 9
|
||||
#define PCRE_UTF8_ERR10 10
|
||||
#define PCRE_UTF8_ERR11 11
|
||||
#define PCRE_UTF8_ERR12 12
|
||||
#define PCRE_UTF8_ERR13 13
|
||||
#define PCRE_UTF8_ERR14 14
|
||||
#define PCRE_UTF8_ERR15 15
|
||||
#define PCRE_UTF8_ERR16 16
|
||||
#define PCRE_UTF8_ERR17 17
|
||||
#define PCRE_UTF8_ERR18 18
|
||||
#define PCRE_UTF8_ERR19 19
|
||||
#define PCRE_UTF8_ERR20 20
|
||||
#define PCRE_UTF8_ERR21 21
|
||||
|
||||
/* Specific error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE_UTF16_ERR0 0
|
||||
#define PCRE_UTF16_ERR1 1
|
||||
#define PCRE_UTF16_ERR2 2
|
||||
#define PCRE_UTF16_ERR3 3
|
||||
#define PCRE_UTF16_ERR4 4
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
|
||||
#define PCRE_INFO_OPTIONS 0
|
||||
#define PCRE_INFO_SIZE 1
|
||||
#define PCRE_INFO_CAPTURECOUNT 2
|
||||
#define PCRE_INFO_BACKREFMAX 3
|
||||
#define PCRE_INFO_FIRSTBYTE 4
|
||||
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
||||
#define PCRE_INFO_FIRSTTABLE 5
|
||||
#define PCRE_INFO_LASTLITERAL 6
|
||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||
#define PCRE_INFO_NAMECOUNT 8
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
#define PCRE_INFO_OKPARTIAL 12
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
#define PCRE_INFO_HASCRORLF 14
|
||||
#define PCRE_INFO_MINLENGTH 15
|
||||
#define PCRE_INFO_JIT 16
|
||||
#define PCRE_INFO_JITSIZE 17
|
||||
#define PCRE_INFO_MAXLOOKBEHIND 18
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_CONFIG_UTF8 0
|
||||
#define PCRE_CONFIG_NEWLINE 1
|
||||
#define PCRE_CONFIG_LINK_SIZE 2
|
||||
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||
#define PCRE_CONFIG_STACKRECURSE 5
|
||||
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||
#define PCRE_CONFIG_BSR 8
|
||||
#define PCRE_CONFIG_JIT 9
|
||||
#define PCRE_CONFIG_UTF16 10
|
||||
#define PCRE_CONFIG_JITTARGET 11
|
||||
|
||||
/* Request types for pcre_study(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_STUDY_JIT_COMPILE 0x0001
|
||||
#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002
|
||||
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004
|
||||
|
||||
/* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine
|
||||
these bits, just add new ones on the end, in order to remain compatible. */
|
||||
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||
#define PCRE_EXTRA_TABLES 0x0008
|
||||
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
||||
#define PCRE_EXTRA_MARK 0x0020
|
||||
#define PCRE_EXTRA_EXECUTABLE_JIT 0x0040
|
||||
|
||||
/* Types */
|
||||
|
||||
struct real_pcre; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
|
||||
struct real_pcre16; /* declaration; the definition is private */
|
||||
typedef struct real_pcre16 pcre16;
|
||||
|
||||
struct real_pcre_jit_stack; /* declaration; the definition is private */
|
||||
typedef struct real_pcre_jit_stack pcre_jit_stack;
|
||||
|
||||
struct real_pcre16_jit_stack; /* declaration; the definition is private */
|
||||
typedef struct real_pcre16_jit_stack pcre16_jit_stack;
|
||||
|
||||
/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
|
||||
a 16 bit wide signed data type. Otherwise it can be a dummy data type since
|
||||
pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
|
||||
#ifndef PCRE_UCHAR16
|
||||
#define PCRE_UCHAR16 unsigned short
|
||||
#endif
|
||||
|
||||
#ifndef PCRE_SPTR16
|
||||
#define PCRE_SPTR16 const PCRE_UCHAR16 *
|
||||
#endif
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
||||
replaced with a custom type. For conventional use, the public interface is a
|
||||
const char *. */
|
||||
|
||||
#ifndef PCRE_SPTR
|
||||
#define PCRE_SPTR const char *
|
||||
#endif
|
||||
|
||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||
such as way as to be extensible. Always add new fields at the end, in order to
|
||||
remain compatible. */
|
||||
|
||||
typedef struct pcre_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
unsigned char **mark; /* For passing back a mark pointer */
|
||||
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
||||
} pcre_extra;
|
||||
|
||||
/* Same structure as above, but with 16 bit char pointers. */
|
||||
|
||||
typedef struct pcre16_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
PCRE_UCHAR16 **mark; /* For passing back a mark pointer */
|
||||
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
||||
} pcre16_extra;
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. */
|
||||
|
||||
typedef struct pcre_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
PCRE_SPTR subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------- Added for Version 2 -------------------------- */
|
||||
const unsigned char *mark; /* Pointer to current mark or NULL */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre_callout_block;
|
||||
|
||||
/* Same structure as above, but with 16 bit char pointers. */
|
||||
|
||||
typedef struct pcre16_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
PCRE_SPTR16 subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------- Added for Version 2 -------------------------- */
|
||||
const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre16_callout_block;
|
||||
|
||||
/* Indirection for store get and free functions. These can be set to
|
||||
alternative malloc/free functions if required. Special ones are used in the
|
||||
non-recursive case for "frames". There is also an optional callout function
|
||||
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||
have to take another form. */
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||
|
||||
PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre16_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre16_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *);
|
||||
#else /* VPCOMPAT */
|
||||
PCRE_EXP_DECL void *pcre_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_free(void *);
|
||||
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
||||
|
||||
PCRE_EXP_DECL void *pcre16_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre16_free(void *);
|
||||
PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre16_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *);
|
||||
#endif /* VPCOMPAT */
|
||||
|
||||
/* User defined callback which provides a stack just before the match starts. */
|
||||
|
||||
typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
|
||||
typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre16_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, char *, int);
|
||||
PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
|
||||
int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
|
||||
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int,
|
||||
char *, int);
|
||||
PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
|
||||
PCRE_UCHAR16 *, int);
|
||||
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
|
||||
PCRE_SPTR16, int, int, int, int *, int , int *, int);
|
||||
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *,
|
||||
PCRE_SPTR16, int, int, int, int *, int);
|
||||
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
||||
PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
|
||||
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
||||
PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
|
||||
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
|
||||
void *);
|
||||
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
|
||||
int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
|
||||
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
|
||||
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
char **, char **);
|
||||
PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
|
||||
PCRE_UCHAR16 **, PCRE_UCHAR16 **);
|
||||
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int,
|
||||
PCRE_SPTR16 *);
|
||||
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int,
|
||||
PCRE_SPTR16 **);
|
||||
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
||||
PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
|
||||
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
||||
PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int);
|
||||
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
|
||||
PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
|
||||
PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
|
||||
PCRE_EXP_DECL const char *pcre_version(void);
|
||||
PCRE_EXP_DECL const char *pcre16_version(void);
|
||||
|
||||
/* Utility functions for byte order swaps. */
|
||||
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
|
||||
PCRE_SPTR16, int, int *, int);
|
||||
|
||||
/* JIT compiler related functions. */
|
||||
|
||||
PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
|
||||
PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
|
||||
PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
|
||||
PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
|
||||
PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
|
||||
pcre_jit_callback, void *);
|
||||
PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
|
||||
pcre16_jit_callback, void *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcre.h */
|
@ -1,286 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that tests a compiled pattern to
|
||||
see if it was compiled with the opposite endianness. If so, it uses an
|
||||
auxiliary local function to flip the appropriate bytes. */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Swap byte functions *
|
||||
*************************************************/
|
||||
|
||||
/* The following functions swap the bytes of a pcre_uint16
|
||||
and pcre_uint32 value.
|
||||
|
||||
Arguments:
|
||||
value any number
|
||||
|
||||
Returns: the byte swapped value
|
||||
*/
|
||||
|
||||
static pcre_uint32
|
||||
swap_uint32(pcre_uint32 value)
|
||||
{
|
||||
return ((value & 0x000000ff) << 24) |
|
||||
((value & 0x0000ff00) << 8) |
|
||||
((value & 0x00ff0000) >> 8) |
|
||||
(value >> 24);
|
||||
}
|
||||
|
||||
static pcre_uint16
|
||||
swap_uint16(pcre_uint16 value)
|
||||
{
|
||||
return (value >> 8) | (value << 8);
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Test for a byte-flipped compiled regex *
|
||||
*************************************************/
|
||||
|
||||
/* This function swaps the bytes of a compiled pattern usually
|
||||
loaded form the disk. It also sets the tables pointer, which
|
||||
is likely an invalid pointer after reload.
|
||||
|
||||
Arguments:
|
||||
argument_re points to the compiled expression
|
||||
extra_data points to extra data or is NULL
|
||||
tables points to the character tables or NULL
|
||||
|
||||
Returns: 0 if the swap is successful, negative on error
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *argument_re,
|
||||
pcre_extra *extra_data, const unsigned char *tables)
|
||||
#else
|
||||
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *argument_re,
|
||||
pcre16_extra *extra_data, const unsigned char *tables)
|
||||
#endif
|
||||
{
|
||||
REAL_PCRE *re = (REAL_PCRE *)argument_re;
|
||||
pcre_study_data *study;
|
||||
#ifndef COMPILE_PCRE8
|
||||
pcre_uchar *ptr;
|
||||
int length;
|
||||
#ifdef SUPPORT_UTF
|
||||
BOOL utf;
|
||||
BOOL utf16_char;
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* !COMPILE_PCRE8 */
|
||||
|
||||
if (re == NULL) return PCRE_ERROR_NULL;
|
||||
if (re->magic_number == MAGIC_NUMBER)
|
||||
{
|
||||
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
||||
re->tables = tables;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
|
||||
if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
||||
|
||||
re->magic_number = MAGIC_NUMBER;
|
||||
re->size = swap_uint32(re->size);
|
||||
re->options = swap_uint32(re->options);
|
||||
re->flags = swap_uint16(re->flags);
|
||||
re->top_bracket = swap_uint16(re->top_bracket);
|
||||
re->top_backref = swap_uint16(re->top_backref);
|
||||
re->first_char = swap_uint16(re->first_char);
|
||||
re->req_char = swap_uint16(re->req_char);
|
||||
re->name_table_offset = swap_uint16(re->name_table_offset);
|
||||
re->name_entry_size = swap_uint16(re->name_entry_size);
|
||||
re->name_count = swap_uint16(re->name_count);
|
||||
re->ref_count = swap_uint16(re->ref_count);
|
||||
re->tables = tables;
|
||||
|
||||
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
{
|
||||
study = (pcre_study_data *)extra_data->study_data;
|
||||
study->size = swap_uint32(study->size);
|
||||
study->flags = swap_uint32(study->flags);
|
||||
study->minlength = swap_uint32(study->minlength);
|
||||
}
|
||||
|
||||
#ifndef COMPILE_PCRE8
|
||||
ptr = (pcre_uchar *)re + re->name_table_offset;
|
||||
length = re->name_count * re->name_entry_size;
|
||||
#ifdef SUPPORT_UTF
|
||||
utf = (re->options & PCRE_UTF16) != 0;
|
||||
utf16_char = FALSE;
|
||||
#endif
|
||||
|
||||
while(TRUE)
|
||||
{
|
||||
/* Swap previous characters. */
|
||||
while (length-- > 0)
|
||||
{
|
||||
*ptr = swap_uint16(*ptr);
|
||||
ptr++;
|
||||
}
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf16_char)
|
||||
{
|
||||
if (HAS_EXTRALEN(ptr[-1]))
|
||||
{
|
||||
/* We know that there is only one extra character in UTF-16. */
|
||||
*ptr = swap_uint16(*ptr);
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
utf16_char = FALSE;
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
/* Get next opcode. */
|
||||
length = 0;
|
||||
*ptr = swap_uint16(*ptr);
|
||||
switch (*ptr)
|
||||
{
|
||||
case OP_END:
|
||||
return 0;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_EXACT:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSUPTO:
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_EXACTI:
|
||||
case OP_POSSTARI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
if (utf) utf16_char = TRUE;
|
||||
#endif
|
||||
/* Fall through. */
|
||||
|
||||
default:
|
||||
length = PRIV(OP_lengths)[*ptr] - 1;
|
||||
break;
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
/* Skip the character bit map. */
|
||||
ptr += 32/sizeof(pcre_uchar);
|
||||
length = 0;
|
||||
break;
|
||||
|
||||
case OP_XCLASS:
|
||||
/* Reverse the size of the XCLASS instance. */
|
||||
ptr++;
|
||||
*ptr = swap_uint16(*ptr);
|
||||
if (LINK_SIZE > 1)
|
||||
{
|
||||
/* LINK_SIZE can be 1 or 2 in 16 bit mode. */
|
||||
ptr++;
|
||||
*ptr = swap_uint16(*ptr);
|
||||
}
|
||||
ptr++;
|
||||
length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1);
|
||||
*ptr = swap_uint16(*ptr);
|
||||
if ((*ptr & XCL_MAP) != 0)
|
||||
{
|
||||
/* Skip the character bit map. */
|
||||
ptr += 32/sizeof(pcre_uchar);
|
||||
length -= 32/sizeof(pcre_uchar);
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
/* Control should never reach here in 16 bit mode. */
|
||||
#endif /* !COMPILE_PCRE8 */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_byte_order.c */
|
@ -1,196 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file contains character tables that are used when no external tables
|
||||
are passed to PCRE by the application that calls it. The tables are used only
|
||||
for characters whose code values are less than 256.
|
||||
|
||||
This is a default version of the tables that assumes ASCII encoding. A program
|
||||
called dftables (which is distributed with PCRE) can be used to build
|
||||
alternative versions of this file. This is necessary if you are running in an
|
||||
EBCDIC environment, or if you want to default to a different encoding, for
|
||||
example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
|
||||
The following #includes are present because without them gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
const pcre_uint8 PRIV(default_tables)[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table is a case flipping table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
/* This table identifies various classes of character by individual bits:
|
||||
0x01 white space character
|
||||
0x02 letter
|
||||
0x04 decimal digit
|
||||
0x08 hexadecimal digit
|
||||
0x10 alphanumeric or '_'
|
||||
0x80 regular expression metacharacter or binary zero
|
||||
*/
|
||||
|
||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
|
||||
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of pcre_chartables.c */
|
File diff suppressed because it is too large
Load Diff
@ -1,168 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_config(). */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/* Keep the original link size. */
|
||||
static int real_link_size = LINK_SIZE;
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/* This function has an extensible interface so that additional items can be
|
||||
added compatibly.
|
||||
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_config(int what, void *where)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_config(int what, void *where)
|
||||
#endif
|
||||
{
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_CONFIG_UTF8:
|
||||
#if defined COMPILE_PCRE16
|
||||
*((int *)where) = 0;
|
||||
return PCRE_ERROR_BADOPTION;
|
||||
#else
|
||||
#if defined SUPPORT_UTF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
|
||||
case PCRE_CONFIG_UTF16:
|
||||
#if defined COMPILE_PCRE8
|
||||
*((int *)where) = 0;
|
||||
return PCRE_ERROR_BADOPTION;
|
||||
#else
|
||||
#if defined SUPPORT_UTF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
|
||||
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
||||
#ifdef SUPPORT_UCP
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_JIT:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_JITTARGET:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((const char **)where) = PRIV(jit_get_target)();
|
||||
#else
|
||||
*((const char **)where) = NULL;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_NEWLINE:
|
||||
*((int *)where) = NEWLINE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_LINK_SIZE:
|
||||
*((int *)where) = real_link_size;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
||||
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT:
|
||||
*((unsigned long int *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
||||
*((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_STACKRECURSE:
|
||||
#ifdef NO_RECURSE
|
||||
*((int *)where) = 0;
|
||||
#else
|
||||
*((int *)where) = 1;
|
||||
#endif
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_config.c */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,204 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_fullinfo(), which returns
|
||||
information about a compiled pattern. */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is a newer "info" function which has an extensible interface so
|
||||
that additional items can be added compatibly.
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
extra_data points extra data, or NULL
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
int what, void *where)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data,
|
||||
int what, void *where)
|
||||
#endif
|
||||
{
|
||||
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
|
||||
const pcre_study_data *study = NULL;
|
||||
|
||||
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
||||
|
||||
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
|
||||
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
|
||||
means that the pattern is likely compiled with different endianness. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
||||
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
||||
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_INFO_OPTIONS:
|
||||
*((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_SIZE:
|
||||
*((size_t *)where) = re->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_STUDYSIZE:
|
||||
*((size_t *)where) = (study == NULL)? 0 : study->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JITSIZE:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((size_t *)where) =
|
||||
(extra_data != NULL &&
|
||||
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
|
||||
extra_data->executable_jit != NULL)?
|
||||
PRIV(jit_get_size)(extra_data->executable_jit) : 0;
|
||||
#else
|
||||
*((size_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_INFO_CAPTURECOUNT:
|
||||
*((int *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_BACKREFMAX:
|
||||
*((int *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_FIRSTBYTE:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
|
||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
break;
|
||||
|
||||
/* Make sure we pass back the pointer to the bit vector in the external
|
||||
block, not the internal copy (with flipped integer fields). */
|
||||
|
||||
case PCRE_INFO_FIRSTTABLE:
|
||||
*((const pcre_uint8 **)where) =
|
||||
(study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
|
||||
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_MINLENGTH:
|
||||
*((int *)where) =
|
||||
(study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
|
||||
(int)(study->minlength) : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JIT:
|
||||
*((int *)where) = extra_data != NULL &&
|
||||
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
|
||||
extra_data->executable_jit != NULL;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_LASTLITERAL:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_REQCHSET) != 0)? re->req_char : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMEENTRYSIZE:
|
||||
*((int *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMECOUNT:
|
||||
*((int *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMETABLE:
|
||||
*((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_DEFAULT_TABLES:
|
||||
*((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables));
|
||||
break;
|
||||
|
||||
/* From release 8.00 this will always return TRUE because NOPARTIAL is
|
||||
no longer ever set (the restrictions have been removed). */
|
||||
|
||||
case PCRE_INFO_OKPARTIAL:
|
||||
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JCHANGED:
|
||||
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_HASCRORLF:
|
||||
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_MAXLOOKBEHIND:
|
||||
*((int *)where) = re->max_lookbehind;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_fullinfo.c */
|
@ -1,585 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains some convenience functions for extracting substrings
|
||||
from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer. */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used by the get_first_set() function below, as well
|
||||
as being generally available. It assumes that names are unique.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parentheses, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
|
||||
#endif
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
pcre_uchar *nametable;
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
#ifdef COMPILE_PCRE16
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
pcre_uchar *entry = nametable + entrysize*mid;
|
||||
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(entry + IMM2_SIZE));
|
||||
if (c == 0) return GET2(entry, 0);
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This is used by the get_first_set() function below, as well as being
|
||||
generally available. It is used when duplicated names are permitted.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: the length of each entry, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||
char **firstptr, char **lastptr)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
|
||||
PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
|
||||
#endif
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
pcre_uchar *nametable, *lastentry;
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
#ifdef COMPILE_PCRE16
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
|
||||
lastentry = nametable + entrysize * (top - 1);
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
pcre_uchar *entry = nametable + entrysize*mid;
|
||||
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(entry + IMM2_SIZE));
|
||||
if (c == 0)
|
||||
{
|
||||
pcre_uchar *first = entry;
|
||||
pcre_uchar *last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
#ifdef COMPILE_PCRE8
|
||||
*firstptr = (char *)first;
|
||||
*lastptr = (char *)last;
|
||||
#else
|
||||
*firstptr = (PCRE_UCHAR16 *)first;
|
||||
*lastptr = (PCRE_UCHAR16 *)last;
|
||||
#endif
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find first set of multiple named strings *
|
||||
*************************************************/
|
||||
|
||||
/* This function allows for duplicate names in the table of named substrings.
|
||||
It returns the number of the first one that was set in a pattern match.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name of the capturing substring
|
||||
ovector the vector of matched substrings
|
||||
|
||||
Returns: the number of the first that is set,
|
||||
or the number of the last one if none are set,
|
||||
or a negative number on error
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
static int
|
||||
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
||||
#else
|
||||
static int
|
||||
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
|
||||
#endif
|
||||
{
|
||||
const REAL_PCRE *re = (const REAL_PCRE *)code;
|
||||
int entrysize;
|
||||
pcre_uchar *entry;
|
||||
#ifdef COMPILE_PCRE8
|
||||
char *first, *last;
|
||||
#else
|
||||
PCRE_UCHAR16 *first, *last;
|
||||
#endif
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||
return pcre_get_stringnumber(code, stringname);
|
||||
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||
#else
|
||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||
return pcre16_get_stringnumber(code, stringname);
|
||||
entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
|
||||
#endif
|
||||
if (entrysize <= 0) return entrysize;
|
||||
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
|
||||
{
|
||||
int n = GET2(entry, 0);
|
||||
if (ovector[n*2] >= 0) return n;
|
||||
}
|
||||
return GET2(entry, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer.
|
||||
Note that we use memcpy() rather than strncpy() in case there are binary zeros
|
||||
in the string.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, char *buffer, int size)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||
int stringnumber, PCRE_UCHAR16 *buffer, int size)
|
||||
#endif
|
||||
{
|
||||
int yield;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
|
||||
buffer[yield] = 0;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject,
|
||||
int *ovector, int stringcount, const char *stringname,
|
||||
char *buffer, int size)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
|
||||
int *ovector, int stringcount, PCRE_SPTR16 stringname,
|
||||
PCRE_UCHAR16 *buffer, int size)
|
||||
#endif
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
#ifdef COMPILE_PCRE8
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
#else
|
||||
return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy all captured strings to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of store and builds a list of pointers and all
|
||||
of the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
listptr set to point to the list of pointers
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||
const char ***listptr)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||
PCRE_SPTR16 **listptr)
|
||||
#endif
|
||||
{
|
||||
int i;
|
||||
int size = sizeof(pcre_uchar *);
|
||||
int double_count = stringcount * 2;
|
||||
pcre_uchar **stringlist;
|
||||
pcre_uchar *p;
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
|
||||
|
||||
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
|
||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
*listptr = (const char **)stringlist;
|
||||
#else
|
||||
*listptr = (PCRE_SPTR16 *)stringlist;
|
||||
#endif
|
||||
p = (pcre_uchar *)(stringlist + stringcount + 1);
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
{
|
||||
int len = ovector[i+1] - ovector[i];
|
||||
memcpy(p, subject + ovector[i], IN_UCHARS(len));
|
||||
*stringlist++ = p;
|
||||
p += len;
|
||||
*p++ = 0;
|
||||
}
|
||||
|
||||
*stringlist = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring_list *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (PUBL(free))()
|
||||
directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring_list()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre_free_substring_list(const char **pointer)
|
||||
#else
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre16_free_substring_list(PCRE_SPTR16 *pointer)
|
||||
#endif
|
||||
{
|
||||
(PUBL(free))((void *)pointer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
store
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the substring
|
||||
|
||||
Returns: if successful:
|
||||
the length of the string, not including the zero that
|
||||
is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, const char **stringptr)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||
int stringnumber, PCRE_SPTR16 *stringptr)
|
||||
#endif
|
||||
{
|
||||
int yield;
|
||||
pcre_uchar *substring;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
|
||||
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
|
||||
substring[yield] = 0;
|
||||
#ifdef COMPILE_PCRE8
|
||||
*stringptr = (const char *)substring;
|
||||
#else
|
||||
*stringptr = (PCRE_SPTR16)substring;
|
||||
#endif
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new store. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_named_substring(const pcre *code, const char *subject,
|
||||
int *ovector, int stringcount, const char *stringname,
|
||||
const char **stringptr)
|
||||
#else
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
|
||||
int *ovector, int stringcount, PCRE_SPTR16 stringname,
|
||||
PCRE_SPTR16 *stringptr)
|
||||
#endif
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
#ifdef COMPILE_PCRE8
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
#else
|
||||
return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (PUBL(free))()
|
||||
directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre_free_substring(const char *pointer)
|
||||
#else
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre16_free_substring(PCRE_SPTR16 pointer)
|
||||
#endif
|
||||
{
|
||||
(PUBL(free))((void *)pointer);
|
||||
}
|
||||
|
||||
/* End of pcre_get.c */
|
@ -1,88 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains global variables that are exported by the PCRE library.
|
||||
PCRE is thread-clean and doesn't use any global variables in the normal sense.
|
||||
However, it calls memory allocation and freeing functions via the four
|
||||
indirections below, and it can optionally do callouts, using the fifth
|
||||
indirection. These values can be changed by the caller, but are shared between
|
||||
all threads.
|
||||
|
||||
For MS Visual Studio and Symbian OS, there are problems in initializing these
|
||||
variables to non-local functions. In these cases, therefore, an indirection via
|
||||
a local function is used.
|
||||
|
||||
Also, when compiling for Virtual Pascal, things are done differently, and
|
||||
global variables are not used. */
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#ifdef GLIB_COMPILATION
|
||||
#include "gmem.h"
|
||||
#else
|
||||
#include <glib.h>
|
||||
#endif /* GLIB_COMPILATION */
|
||||
|
||||
#if defined _MSC_VER || defined __SYMBIAN32__
|
||||
static void* LocalPcreMalloc(size_t aSize)
|
||||
{
|
||||
return malloc(aSize);
|
||||
}
|
||||
static void LocalPcreFree(void* aPtr)
|
||||
{
|
||||
free(aPtr);
|
||||
}
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
|
||||
#elif !defined VPCOMPAT
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = g_try_malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = g_free;
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = g_try_malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = g_free;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,182 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is less than the end of the
|
||||
string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
|
||||
BOOL utf)
|
||||
{
|
||||
int c;
|
||||
(void)utf;
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
c = *ptr;
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = 1; return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: /* LF */
|
||||
case 0x000b: /* VT */
|
||||
case 0x000c: *lenptr = 1; return TRUE; /* FF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
#ifdef COMPILE_PCRE8
|
||||
case 0x0085: *lenptr = utf? 2 : 1; return TRUE; /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
#else
|
||||
case 0x0085: /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||
#endif /* COMPILE_PCRE8 */
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is greater than the start of
|
||||
the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
|
||||
BOOL utf)
|
||||
{
|
||||
int c;
|
||||
(void)utf;
|
||||
ptr--;
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
c = *ptr;
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000b: /* VT */
|
||||
case 0x000c: /* FF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
#ifdef COMPILE_PCRE8
|
||||
case 0x0085: *lenptr = utf? 2 : 1; return TRUE; /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
#else
|
||||
case 0x0085: /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||
#endif /* COMPILE_PCRE8 */
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre_newline.c */
|
@ -1,95 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert character value to UTF-8 *
|
||||
*************************************************/
|
||||
|
||||
/* This function takes an integer value in the range 0 - 0x10ffff
|
||||
and encodes it as a UTF-8 character in 1 to 6 pcre_uchars.
|
||||
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result - at least 6 pcre_uchars long
|
||||
|
||||
Returns: number of characters placed in the buffer
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
|
||||
int i, j;
|
||||
|
||||
/* Checking invalid cvalue character, encoded as invalid UTF-16 character.
|
||||
Should never happen in practice. */
|
||||
if ((cvalue & 0xf800) == 0xd800 || cvalue >= 0x110000)
|
||||
cvalue = 0xfffe;
|
||||
|
||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
||||
buffer += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = PRIV(utf8_table2)[i] | cvalue;
|
||||
return i + 1;
|
||||
|
||||
#else
|
||||
|
||||
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
|
||||
(void)(buffer); /* called when SUPPORT_UTF is not defined. */
|
||||
return 0;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/* End of pcre_ord2utf8.c */
|
@ -1,166 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class. It is used by both pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#ifndef COMPILE_PCRE8
|
||||
|
||||
/*************************************************
|
||||
* Compare string utilities *
|
||||
*************************************************/
|
||||
|
||||
/* The following two functions compares two strings. Basically an strcmp
|
||||
for non 8 bit characters.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0 if both string are equal (like strcmp), 1 otherwise
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp_uc_uc)(const pcre_uchar *str1, const pcre_uchar *str2)
|
||||
{
|
||||
pcre_uchar c1;
|
||||
pcre_uchar c2;
|
||||
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2)
|
||||
return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
/* Both length and characters must be equal. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
PRIV(strcmp_uc_c8)(const pcre_uchar *str1, const char *str2)
|
||||
{
|
||||
const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
|
||||
pcre_uchar c1;
|
||||
pcre_uchar c2;
|
||||
|
||||
while (*str1 != '\0' || *ustr2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = (pcre_uchar)*ustr2++;
|
||||
if (c1 != c2)
|
||||
return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
/* Both length and characters must be equal. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The following two functions compares two, fixed length
|
||||
strings. Basically an strncmp for non 8 bit characters.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
num size of the string
|
||||
|
||||
Returns: 0 if both string are equal (like strcmp), 1 otherwise
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp_uc_uc)(const pcre_uchar *str1, const pcre_uchar *str2, unsigned int num)
|
||||
{
|
||||
pcre_uchar c1;
|
||||
pcre_uchar c2;
|
||||
|
||||
while (num-- > 0)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2)
|
||||
return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
/* Both length and characters must be equal. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
PRIV(strncmp_uc_c8)(const pcre_uchar *str1, const char *str2, unsigned int num)
|
||||
{
|
||||
const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
|
||||
pcre_uchar c1;
|
||||
pcre_uchar c2;
|
||||
|
||||
while (num-- > 0)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = (pcre_uchar)*ustr2++;
|
||||
if (c1 != c2)
|
||||
return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
/* Both length and characters must be equal. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The following function returns with the length of
|
||||
a zero terminated string. Basically an strlen for non 8 bit characters.
|
||||
|
||||
Arguments:
|
||||
str string
|
||||
|
||||
Returns: length of the string
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
PRIV(strlen_uc)(const pcre_uchar *str)
|
||||
{
|
||||
unsigned int len = 0;
|
||||
while (*str++ != 0)
|
||||
len++;
|
||||
return len;
|
||||
}
|
||||
|
||||
#endif /* COMPILE_PCRE8 */
|
||||
|
||||
/* End of pcre_string_utils.c */
|
File diff suppressed because it is too large
Load Diff
@ -1,600 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE_INCLUDED
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. The tables are also #included by the pcretest program, which
|
||||
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||
clashes with the library. */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#endif /* PCRE_INCLUDED */
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre_internal.h. */
|
||||
|
||||
const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
|
||||
|| (defined PCRE_INCLUDED && defined SUPPORT_PCRE16)
|
||||
|
||||
/* These tables are also required by pcretest in 16 bit mode. */
|
||||
|
||||
const int PRIV(utf8_table1)[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||
|
||||
const pcre_uint8 PRIV(utf8_table4)[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
const int PRIV(ucp_gentype)[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
/* This table reverses PRIV(ucp_gentype). We can save the cost
|
||||
of a memory load. */
|
||||
|
||||
const int PRIV(ucp_typerange)[] = {
|
||||
ucp_Cc, ucp_Cs,
|
||||
ucp_Ll, ucp_Lu,
|
||||
ucp_Mc, ucp_Mn,
|
||||
ucp_Nd, ucp_No,
|
||||
ucp_Pc, ucp_Ps,
|
||||
ucp_Sc, ucp_So,
|
||||
ucp_Zl, ucp_Zs,
|
||||
};
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* The pcre_utt[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
field of each entry. However, that leads to a large number of relocations when
|
||||
a shared library is dynamically loaded. A significant reduction is made by
|
||||
putting all the names into a single, large string and then using offsets in the
|
||||
table itself. Maintenance is more error-prone, but frequent changes to this
|
||||
data are unlikely.
|
||||
|
||||
July 2008: There is now a script called maint/GenerateUtt.py that can be used
|
||||
to generate this data automatically instead of maintaining it by hand.
|
||||
|
||||
The script was updated in March 2009 to generate a new EBCDIC-compliant
|
||||
version. Like all other character and string literals that are compared against
|
||||
the regular expression pattern, we must use STR_ macros instead of literal
|
||||
strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
|
||||
#define STRING_Any0 STR_A STR_n STR_y "\0"
|
||||
#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
|
||||
#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
|
||||
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
|
||||
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
|
||||
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
|
||||
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
||||
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
||||
#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
|
||||
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
|
||||
#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"
|
||||
#define STRING_C0 STR_C "\0"
|
||||
#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
|
||||
#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
#define STRING_Cc0 STR_C STR_c "\0"
|
||||
#define STRING_Cf0 STR_C STR_f "\0"
|
||||
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
|
||||
#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
|
||||
#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
|
||||
#define STRING_Cn0 STR_C STR_n "\0"
|
||||
#define STRING_Co0 STR_C STR_o "\0"
|
||||
#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
|
||||
#define STRING_Coptic0 STR_C STR_o STR_p STR_t STR_i STR_c "\0"
|
||||
#define STRING_Cs0 STR_C STR_s "\0"
|
||||
#define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0"
|
||||
#define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0"
|
||||
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
||||
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
||||
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
|
||||
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
|
||||
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
|
||||
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
|
||||
#define STRING_Han0 STR_H STR_a STR_n "\0"
|
||||
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
|
||||
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
|
||||
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
|
||||
#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
|
||||
#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
|
||||
#define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
|
||||
#define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||
#define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
|
||||
#define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0"
|
||||
#define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0"
|
||||
#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
|
||||
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
||||
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
||||
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
|
||||
#define STRING_L0 STR_L "\0"
|
||||
#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
|
||||
#define STRING_Lao0 STR_L STR_a STR_o "\0"
|
||||
#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
|
||||
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
|
||||
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
|
||||
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
|
||||
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
|
||||
#define STRING_Ll0 STR_L STR_l "\0"
|
||||
#define STRING_Lm0 STR_L STR_m "\0"
|
||||
#define STRING_Lo0 STR_L STR_o "\0"
|
||||
#define STRING_Lt0 STR_L STR_t "\0"
|
||||
#define STRING_Lu0 STR_L STR_u "\0"
|
||||
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
|
||||
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_M0 STR_M "\0"
|
||||
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
||||
#define STRING_Mc0 STR_M STR_c "\0"
|
||||
#define STRING_Me0 STR_M STR_e "\0"
|
||||
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
||||
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
|
||||
#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
|
||||
#define STRING_Mn0 STR_M STR_n "\0"
|
||||
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||
#define STRING_N0 STR_N "\0"
|
||||
#define STRING_Nd0 STR_N STR_d "\0"
|
||||
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
|
||||
#define STRING_Nko0 STR_N STR_k STR_o "\0"
|
||||
#define STRING_Nl0 STR_N STR_l "\0"
|
||||
#define STRING_No0 STR_N STR_o "\0"
|
||||
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
||||
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
||||
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
||||
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
||||
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
||||
#define STRING_P0 STR_P "\0"
|
||||
#define STRING_Pc0 STR_P STR_c "\0"
|
||||
#define STRING_Pd0 STR_P STR_d "\0"
|
||||
#define STRING_Pe0 STR_P STR_e "\0"
|
||||
#define STRING_Pf0 STR_P STR_f "\0"
|
||||
#define STRING_Phags_Pa0 STR_P STR_h STR_a STR_g STR_s STR_UNDERSCORE STR_P STR_a "\0"
|
||||
#define STRING_Phoenician0 STR_P STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0"
|
||||
#define STRING_Pi0 STR_P STR_i "\0"
|
||||
#define STRING_Po0 STR_P STR_o "\0"
|
||||
#define STRING_Ps0 STR_P STR_s "\0"
|
||||
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
|
||||
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
|
||||
#define STRING_S0 STR_S "\0"
|
||||
#define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
|
||||
#define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
|
||||
#define STRING_Sc0 STR_S STR_c "\0"
|
||||
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
||||
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
||||
#define STRING_Sk0 STR_S STR_k "\0"
|
||||
#define STRING_Sm0 STR_S STR_m "\0"
|
||||
#define STRING_So0 STR_S STR_o "\0"
|
||||
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
|
||||
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
|
||||
#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
|
||||
#define STRING_Tagalog0 STR_T STR_a STR_g STR_a STR_l STR_o STR_g "\0"
|
||||
#define STRING_Tagbanwa0 STR_T STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0"
|
||||
#define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0"
|
||||
#define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0"
|
||||
#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
|
||||
#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0"
|
||||
#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
|
||||
#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
|
||||
#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
|
||||
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
|
||||
#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
|
||||
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
|
||||
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Vai0 STR_V STR_a STR_i "\0"
|
||||
#define STRING_Xan0 STR_X STR_a STR_n "\0"
|
||||
#define STRING_Xps0 STR_X STR_p STR_s "\0"
|
||||
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
|
||||
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
|
||||
#define STRING_Yi0 STR_Y STR_i "\0"
|
||||
#define STRING_Z0 STR_Z "\0"
|
||||
#define STRING_Zl0 STR_Z STR_l "\0"
|
||||
#define STRING_Zp0 STR_Z STR_p "\0"
|
||||
#define STRING_Zs0 STR_Z STR_s "\0"
|
||||
|
||||
const char PRIV(utt_names)[] =
|
||||
STRING_Any0
|
||||
STRING_Arabic0
|
||||
STRING_Armenian0
|
||||
STRING_Avestan0
|
||||
STRING_Balinese0
|
||||
STRING_Bamum0
|
||||
STRING_Batak0
|
||||
STRING_Bengali0
|
||||
STRING_Bopomofo0
|
||||
STRING_Brahmi0
|
||||
STRING_Braille0
|
||||
STRING_Buginese0
|
||||
STRING_Buhid0
|
||||
STRING_C0
|
||||
STRING_Canadian_Aboriginal0
|
||||
STRING_Carian0
|
||||
STRING_Cc0
|
||||
STRING_Cf0
|
||||
STRING_Chakma0
|
||||
STRING_Cham0
|
||||
STRING_Cherokee0
|
||||
STRING_Cn0
|
||||
STRING_Co0
|
||||
STRING_Common0
|
||||
STRING_Coptic0
|
||||
STRING_Cs0
|
||||
STRING_Cuneiform0
|
||||
STRING_Cypriot0
|
||||
STRING_Cyrillic0
|
||||
STRING_Deseret0
|
||||
STRING_Devanagari0
|
||||
STRING_Egyptian_Hieroglyphs0
|
||||
STRING_Ethiopic0
|
||||
STRING_Georgian0
|
||||
STRING_Glagolitic0
|
||||
STRING_Gothic0
|
||||
STRING_Greek0
|
||||
STRING_Gujarati0
|
||||
STRING_Gurmukhi0
|
||||
STRING_Han0
|
||||
STRING_Hangul0
|
||||
STRING_Hanunoo0
|
||||
STRING_Hebrew0
|
||||
STRING_Hiragana0
|
||||
STRING_Imperial_Aramaic0
|
||||
STRING_Inherited0
|
||||
STRING_Inscriptional_Pahlavi0
|
||||
STRING_Inscriptional_Parthian0
|
||||
STRING_Javanese0
|
||||
STRING_Kaithi0
|
||||
STRING_Kannada0
|
||||
STRING_Katakana0
|
||||
STRING_Kayah_Li0
|
||||
STRING_Kharoshthi0
|
||||
STRING_Khmer0
|
||||
STRING_L0
|
||||
STRING_L_AMPERSAND0
|
||||
STRING_Lao0
|
||||
STRING_Latin0
|
||||
STRING_Lepcha0
|
||||
STRING_Limbu0
|
||||
STRING_Linear_B0
|
||||
STRING_Lisu0
|
||||
STRING_Ll0
|
||||
STRING_Lm0
|
||||
STRING_Lo0
|
||||
STRING_Lt0
|
||||
STRING_Lu0
|
||||
STRING_Lycian0
|
||||
STRING_Lydian0
|
||||
STRING_M0
|
||||
STRING_Malayalam0
|
||||
STRING_Mandaic0
|
||||
STRING_Mc0
|
||||
STRING_Me0
|
||||
STRING_Meetei_Mayek0
|
||||
STRING_Meroitic_Cursive0
|
||||
STRING_Meroitic_Hieroglyphs0
|
||||
STRING_Miao0
|
||||
STRING_Mn0
|
||||
STRING_Mongolian0
|
||||
STRING_Myanmar0
|
||||
STRING_N0
|
||||
STRING_Nd0
|
||||
STRING_New_Tai_Lue0
|
||||
STRING_Nko0
|
||||
STRING_Nl0
|
||||
STRING_No0
|
||||
STRING_Ogham0
|
||||
STRING_Ol_Chiki0
|
||||
STRING_Old_Italic0
|
||||
STRING_Old_Persian0
|
||||
STRING_Old_South_Arabian0
|
||||
STRING_Old_Turkic0
|
||||
STRING_Oriya0
|
||||
STRING_Osmanya0
|
||||
STRING_P0
|
||||
STRING_Pc0
|
||||
STRING_Pd0
|
||||
STRING_Pe0
|
||||
STRING_Pf0
|
||||
STRING_Phags_Pa0
|
||||
STRING_Phoenician0
|
||||
STRING_Pi0
|
||||
STRING_Po0
|
||||
STRING_Ps0
|
||||
STRING_Rejang0
|
||||
STRING_Runic0
|
||||
STRING_S0
|
||||
STRING_Samaritan0
|
||||
STRING_Saurashtra0
|
||||
STRING_Sc0
|
||||
STRING_Sharada0
|
||||
STRING_Shavian0
|
||||
STRING_Sinhala0
|
||||
STRING_Sk0
|
||||
STRING_Sm0
|
||||
STRING_So0
|
||||
STRING_Sora_Sompeng0
|
||||
STRING_Sundanese0
|
||||
STRING_Syloti_Nagri0
|
||||
STRING_Syriac0
|
||||
STRING_Tagalog0
|
||||
STRING_Tagbanwa0
|
||||
STRING_Tai_Le0
|
||||
STRING_Tai_Tham0
|
||||
STRING_Tai_Viet0
|
||||
STRING_Takri0
|
||||
STRING_Tamil0
|
||||
STRING_Telugu0
|
||||
STRING_Thaana0
|
||||
STRING_Thai0
|
||||
STRING_Tibetan0
|
||||
STRING_Tifinagh0
|
||||
STRING_Ugaritic0
|
||||
STRING_Vai0
|
||||
STRING_Xan0
|
||||
STRING_Xps0
|
||||
STRING_Xsp0
|
||||
STRING_Xwd0
|
||||
STRING_Yi0
|
||||
STRING_Z0
|
||||
STRING_Zl0
|
||||
STRING_Zp0
|
||||
STRING_Zs0;
|
||||
|
||||
const ucp_type_table PRIV(utt)[] = {
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Avestan },
|
||||
{ 28, PT_SC, ucp_Balinese },
|
||||
{ 37, PT_SC, ucp_Bamum },
|
||||
{ 43, PT_SC, ucp_Batak },
|
||||
{ 49, PT_SC, ucp_Bengali },
|
||||
{ 57, PT_SC, ucp_Bopomofo },
|
||||
{ 66, PT_SC, ucp_Brahmi },
|
||||
{ 73, PT_SC, ucp_Braille },
|
||||
{ 81, PT_SC, ucp_Buginese },
|
||||
{ 90, PT_SC, ucp_Buhid },
|
||||
{ 96, PT_GC, ucp_C },
|
||||
{ 98, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 118, PT_SC, ucp_Carian },
|
||||
{ 125, PT_PC, ucp_Cc },
|
||||
{ 128, PT_PC, ucp_Cf },
|
||||
{ 131, PT_SC, ucp_Chakma },
|
||||
{ 138, PT_SC, ucp_Cham },
|
||||
{ 143, PT_SC, ucp_Cherokee },
|
||||
{ 152, PT_PC, ucp_Cn },
|
||||
{ 155, PT_PC, ucp_Co },
|
||||
{ 158, PT_SC, ucp_Common },
|
||||
{ 165, PT_SC, ucp_Coptic },
|
||||
{ 172, PT_PC, ucp_Cs },
|
||||
{ 175, PT_SC, ucp_Cuneiform },
|
||||
{ 185, PT_SC, ucp_Cypriot },
|
||||
{ 193, PT_SC, ucp_Cyrillic },
|
||||
{ 202, PT_SC, ucp_Deseret },
|
||||
{ 210, PT_SC, ucp_Devanagari },
|
||||
{ 221, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 242, PT_SC, ucp_Ethiopic },
|
||||
{ 251, PT_SC, ucp_Georgian },
|
||||
{ 260, PT_SC, ucp_Glagolitic },
|
||||
{ 271, PT_SC, ucp_Gothic },
|
||||
{ 278, PT_SC, ucp_Greek },
|
||||
{ 284, PT_SC, ucp_Gujarati },
|
||||
{ 293, PT_SC, ucp_Gurmukhi },
|
||||
{ 302, PT_SC, ucp_Han },
|
||||
{ 306, PT_SC, ucp_Hangul },
|
||||
{ 313, PT_SC, ucp_Hanunoo },
|
||||
{ 321, PT_SC, ucp_Hebrew },
|
||||
{ 328, PT_SC, ucp_Hiragana },
|
||||
{ 337, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 354, PT_SC, ucp_Inherited },
|
||||
{ 364, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 386, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 409, PT_SC, ucp_Javanese },
|
||||
{ 418, PT_SC, ucp_Kaithi },
|
||||
{ 425, PT_SC, ucp_Kannada },
|
||||
{ 433, PT_SC, ucp_Katakana },
|
||||
{ 442, PT_SC, ucp_Kayah_Li },
|
||||
{ 451, PT_SC, ucp_Kharoshthi },
|
||||
{ 462, PT_SC, ucp_Khmer },
|
||||
{ 468, PT_GC, ucp_L },
|
||||
{ 470, PT_LAMP, 0 },
|
||||
{ 473, PT_SC, ucp_Lao },
|
||||
{ 477, PT_SC, ucp_Latin },
|
||||
{ 483, PT_SC, ucp_Lepcha },
|
||||
{ 490, PT_SC, ucp_Limbu },
|
||||
{ 496, PT_SC, ucp_Linear_B },
|
||||
{ 505, PT_SC, ucp_Lisu },
|
||||
{ 510, PT_PC, ucp_Ll },
|
||||
{ 513, PT_PC, ucp_Lm },
|
||||
{ 516, PT_PC, ucp_Lo },
|
||||
{ 519, PT_PC, ucp_Lt },
|
||||
{ 522, PT_PC, ucp_Lu },
|
||||
{ 525, PT_SC, ucp_Lycian },
|
||||
{ 532, PT_SC, ucp_Lydian },
|
||||
{ 539, PT_GC, ucp_M },
|
||||
{ 541, PT_SC, ucp_Malayalam },
|
||||
{ 551, PT_SC, ucp_Mandaic },
|
||||
{ 559, PT_PC, ucp_Mc },
|
||||
{ 562, PT_PC, ucp_Me },
|
||||
{ 565, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 578, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 595, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 616, PT_SC, ucp_Miao },
|
||||
{ 621, PT_PC, ucp_Mn },
|
||||
{ 624, PT_SC, ucp_Mongolian },
|
||||
{ 634, PT_SC, ucp_Myanmar },
|
||||
{ 642, PT_GC, ucp_N },
|
||||
{ 644, PT_PC, ucp_Nd },
|
||||
{ 647, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 659, PT_SC, ucp_Nko },
|
||||
{ 663, PT_PC, ucp_Nl },
|
||||
{ 666, PT_PC, ucp_No },
|
||||
{ 669, PT_SC, ucp_Ogham },
|
||||
{ 675, PT_SC, ucp_Ol_Chiki },
|
||||
{ 684, PT_SC, ucp_Old_Italic },
|
||||
{ 695, PT_SC, ucp_Old_Persian },
|
||||
{ 707, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 725, PT_SC, ucp_Old_Turkic },
|
||||
{ 736, PT_SC, ucp_Oriya },
|
||||
{ 742, PT_SC, ucp_Osmanya },
|
||||
{ 750, PT_GC, ucp_P },
|
||||
{ 752, PT_PC, ucp_Pc },
|
||||
{ 755, PT_PC, ucp_Pd },
|
||||
{ 758, PT_PC, ucp_Pe },
|
||||
{ 761, PT_PC, ucp_Pf },
|
||||
{ 764, PT_SC, ucp_Phags_Pa },
|
||||
{ 773, PT_SC, ucp_Phoenician },
|
||||
{ 784, PT_PC, ucp_Pi },
|
||||
{ 787, PT_PC, ucp_Po },
|
||||
{ 790, PT_PC, ucp_Ps },
|
||||
{ 793, PT_SC, ucp_Rejang },
|
||||
{ 800, PT_SC, ucp_Runic },
|
||||
{ 806, PT_GC, ucp_S },
|
||||
{ 808, PT_SC, ucp_Samaritan },
|
||||
{ 818, PT_SC, ucp_Saurashtra },
|
||||
{ 829, PT_PC, ucp_Sc },
|
||||
{ 832, PT_SC, ucp_Sharada },
|
||||
{ 840, PT_SC, ucp_Shavian },
|
||||
{ 848, PT_SC, ucp_Sinhala },
|
||||
{ 856, PT_PC, ucp_Sk },
|
||||
{ 859, PT_PC, ucp_Sm },
|
||||
{ 862, PT_PC, ucp_So },
|
||||
{ 865, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 878, PT_SC, ucp_Sundanese },
|
||||
{ 888, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 901, PT_SC, ucp_Syriac },
|
||||
{ 908, PT_SC, ucp_Tagalog },
|
||||
{ 916, PT_SC, ucp_Tagbanwa },
|
||||
{ 925, PT_SC, ucp_Tai_Le },
|
||||
{ 932, PT_SC, ucp_Tai_Tham },
|
||||
{ 941, PT_SC, ucp_Tai_Viet },
|
||||
{ 950, PT_SC, ucp_Takri },
|
||||
{ 956, PT_SC, ucp_Tamil },
|
||||
{ 962, PT_SC, ucp_Telugu },
|
||||
{ 969, PT_SC, ucp_Thaana },
|
||||
{ 976, PT_SC, ucp_Thai },
|
||||
{ 981, PT_SC, ucp_Tibetan },
|
||||
{ 989, PT_SC, ucp_Tifinagh },
|
||||
{ 998, PT_SC, ucp_Ugaritic },
|
||||
{ 1007, PT_SC, ucp_Vai },
|
||||
{ 1011, PT_ALNUM, 0 },
|
||||
{ 1015, PT_PXSPACE, 0 },
|
||||
{ 1019, PT_SPACE, 0 },
|
||||
{ 1023, PT_WORD, 0 },
|
||||
{ 1027, PT_SC, ucp_Yi },
|
||||
{ 1030, PT_GC, ucp_Z },
|
||||
{ 1032, PT_PC, ucp_Zl },
|
||||
{ 1035, PT_PC, ucp_Zp },
|
||||
{ 1038, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
||||
unsigned int
|
||||
_pcre_ucp_othercase(const unsigned int c)
|
||||
{
|
||||
unsigned int oc = NOTACHAR;
|
||||
|
||||
if ((oc = g_unichar_toupper(c)) != c)
|
||||
return oc;
|
||||
if ((oc = g_unichar_tolower(c)) != c)
|
||||
return oc;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
/* End of pcre_tables.c */
|
@ -1,297 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function for validating UTF-8 character
|
||||
strings. */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate a UTF-8 string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called (optionally) at the start of compile or match, to
|
||||
check that a supposed UTF-8 string is actually valid. The early check means
|
||||
that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying an
|
||||
invalid string are then undefined.
|
||||
|
||||
Originally, this function checked according to RFC 2279, allowing for values in
|
||||
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
|
||||
the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applied. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
|
||||
characters is still checked.
|
||||
|
||||
From release 8.13 more information about the details of the error are passed
|
||||
back in the returned value:
|
||||
|
||||
PCRE_UTF8_ERR0 No error
|
||||
PCRE_UTF8_ERR1 Missing 1 byte at the end of the string
|
||||
PCRE_UTF8_ERR2 Missing 2 bytes at the end of the string
|
||||
PCRE_UTF8_ERR3 Missing 3 bytes at the end of the string
|
||||
PCRE_UTF8_ERR4 Missing 4 bytes at the end of the string
|
||||
PCRE_UTF8_ERR5 Missing 5 bytes at the end of the string
|
||||
PCRE_UTF8_ERR6 2nd-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR7 3rd-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR8 4th-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR9 5th-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||
PCRE_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||
PCRE_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||
PCRE_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted
|
||||
PCRE_UTF8_ERR15 Overlong 2-byte sequence
|
||||
PCRE_UTF8_ERR16 Overlong 3-byte sequence
|
||||
PCRE_UTF8_ERR17 Overlong 4-byte sequence
|
||||
PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
|
||||
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
|
||||
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
|
||||
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string, or -1 if the string is zero-terminated
|
||||
errp pointer to an error position offset variable
|
||||
|
||||
Returns: = 0 if the string is a valid UTF-8 string
|
||||
> 0 otherwise, setting the offset of the bad character
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
PCRE_PUCHAR p;
|
||||
|
||||
if (length < 0)
|
||||
{
|
||||
for (p = string; *p != 0; p++);
|
||||
length = (int)(p - string);
|
||||
}
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
{
|
||||
int ab, c, d;
|
||||
|
||||
c = *p;
|
||||
if (c < 128) continue; /* ASCII character */
|
||||
|
||||
if (c < 0xc0) /* Isolated 10xx xxxx byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
return PCRE_UTF8_ERR20;
|
||||
}
|
||||
|
||||
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
return PCRE_UTF8_ERR21;
|
||||
}
|
||||
|
||||
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
|
||||
if (length < ab)
|
||||
{
|
||||
*erroroffset = (int)(p - string); /* Missing bytes */
|
||||
return ab - length; /* Codes ERR1 to ERR5 */
|
||||
}
|
||||
length -= ab; /* Length remaining */
|
||||
|
||||
/* Check top bits in the second byte */
|
||||
|
||||
if (((d = *(++p)) & 0xc0) != 0x80)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 1;
|
||||
return PCRE_UTF8_ERR6;
|
||||
}
|
||||
|
||||
/* For each length, check that the remaining bytes start with the 0x80 bit
|
||||
set and not the 0x40 bit. Then check for an overlong sequence, and for the
|
||||
excluded range 0xd800 to 0xdfff. */
|
||||
|
||||
switch (ab)
|
||||
{
|
||||
/* 2-byte character. No further bytes to check for 0x80. Check first byte
|
||||
for for xx00 000x (overlong sequence). */
|
||||
|
||||
case 1: if ((c & 0x3e) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 1;
|
||||
return PCRE_UTF8_ERR15;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
|
||||
for 1110 0000, xx0x xxxx (overlong sequence) or
|
||||
1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
|
||||
|
||||
case 2:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
}
|
||||
if (c == 0xe0 && (d & 0x20) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR16;
|
||||
}
|
||||
if (c == 0xed && d >= 0xa0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR14;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
|
||||
bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
|
||||
character greater than 0x0010ffff (f4 8f bf bf) */
|
||||
|
||||
case 3:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR8;
|
||||
}
|
||||
if (c == 0xf0 && (d & 0x30) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR17;
|
||||
}
|
||||
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR13;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
|
||||
rejected by the length test below. However, we do the appropriate tests
|
||||
here so that overlong sequences get diagnosed, and also in case there is
|
||||
ever an option for handling these larger code points. */
|
||||
|
||||
/* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
|
||||
1111 1000, xx00 0xxx */
|
||||
|
||||
case 4:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE_UTF8_ERR9;
|
||||
}
|
||||
if (c == 0xf8 && (d & 0x38) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE_UTF8_ERR18;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
|
||||
1111 1100, xx00 00xx. */
|
||||
|
||||
case 5:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE_UTF8_ERR9;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 5;
|
||||
return PCRE_UTF8_ERR10;
|
||||
}
|
||||
if (c == 0xfc && (d & 0x3c) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 5;
|
||||
return PCRE_UTF8_ERR19;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
|
||||
excluded by RFC 3629. The pointer p is currently at the last byte of the
|
||||
character. */
|
||||
|
||||
if (ab > 3)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - ab;
|
||||
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* SUPPORT_UTF */
|
||||
(void)(string); /* Keep picky compilers happy */
|
||||
(void)(length);
|
||||
#endif
|
||||
|
||||
return PCRE_UTF8_ERR0; /* This indicates success */
|
||||
}
|
||||
|
||||
/* End of pcre_valid_utf8.c */
|
@ -1,93 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_version(), which returns a
|
||||
string that identifies the PCRE version that is in use. */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return version string *
|
||||
*************************************************/
|
||||
|
||||
/* These macros are the standard way of turning unquoted text into C strings.
|
||||
They allow macros like PCRE_MAJOR to be defined without quotes, which is
|
||||
convenient for user programs that want to test its value. */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
|
||||
production releases. Originally, it was used naively in this code:
|
||||
|
||||
return XSTRING(PCRE_MAJOR)
|
||||
"." XSTRING(PCRE_MINOR)
|
||||
XSTRING(PCRE_PRERELEASE)
|
||||
" " XSTRING(PCRE_DATE);
|
||||
|
||||
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
|
||||
STRING(). The C standard states: "If (before argument substitution) any
|
||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what we
|
||||
really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. To cope with both ways of
|
||||
handling this, I had resort to some messy hackery that does a test at run time.
|
||||
I could find no way of detecting that a macro is defined as an empty string at
|
||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||
the STRING macro with an empty argument when doing the test. */
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
|
||||
pcre_version(void)
|
||||
#else
|
||||
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
|
||||
pcre16_version(void)
|
||||
#endif
|
||||
{
|
||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
|
||||
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
|
||||
}
|
||||
|
||||
/* End of pcre_version.c */
|
@ -1,196 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class. It is used by both pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain values > 255 and/or Unicode properties.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag byte of the XCLASS data
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(xclass)(int c, const pcre_uchar *data, BOOL utf)
|
||||
{
|
||||
int t;
|
||||
BOOL negated = (*data & XCL_NOT) != 0;
|
||||
|
||||
(void)utf;
|
||||
#ifdef COMPILE_PCRE8
|
||||
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
|
||||
utf = TRUE;
|
||||
#endif
|
||||
|
||||
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||
not, we still carry on, because there may be ranges that start below 256 in the
|
||||
additional data. */
|
||||
|
||||
if (c < 256)
|
||||
{
|
||||
if ((*data & XCL_MAP) != 0 &&
|
||||
(((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
|
||||
return !negated; /* char found */
|
||||
}
|
||||
|
||||
/* First skip the bit map if present. Then match against the list of Unicode
|
||||
properties or large chars or ranges that end with a large char. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
|
||||
|
||||
while ((t = *data++) != XCL_END)
|
||||
{
|
||||
int x, y;
|
||||
if (t == XCL_SINGLE)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
x = *data++;
|
||||
if (c == x) return !negated;
|
||||
}
|
||||
else if (t == XCL_RANGE)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
GETCHARINC(y, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
x = *data++;
|
||||
y = *data++;
|
||||
}
|
||||
if (c >= x && c <= y) return !negated;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
case PT_ANY:
|
||||
if (t == XCL_PROP) return !negated;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
if ((chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
||||
== (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
|
||||
== (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
data += 2;
|
||||
}
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
|
||||
return negated; /* char did not match */
|
||||
}
|
||||
|
||||
/* End of pcre_xclass.c */
|
179
glib/pcre/ucp.h
179
glib/pcre/ucp.h
@ -1,179 +0,0 @@
|
||||
/*************************************************
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
#ifndef _UCP_H
|
||||
#define _UCP_H
|
||||
|
||||
/* This file contains definitions of the property values that are returned by
|
||||
the UCD access macros. New values that are added for new releases of Unicode
|
||||
should always be at the end of each enum, for backwards compatibility. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
#ifdef GLIB_COMPILATION
|
||||
#include "gunicode.h"
|
||||
#else
|
||||
#include <glib.h>
|
||||
#endif
|
||||
|
||||
enum {
|
||||
ucp_C, /* Other */
|
||||
ucp_L, /* Letter */
|
||||
ucp_M, /* Mark */
|
||||
ucp_N, /* Number */
|
||||
ucp_P, /* Punctuation */
|
||||
ucp_S, /* Symbol */
|
||||
ucp_Z /* Separator */
|
||||
};
|
||||
|
||||
/* These are the particular character types. */
|
||||
|
||||
enum {
|
||||
ucp_Cc = G_UNICODE_CONTROL, /* Control */
|
||||
ucp_Cf = G_UNICODE_FORMAT, /* Format */
|
||||
ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */
|
||||
ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */
|
||||
ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */
|
||||
ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */
|
||||
ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */
|
||||
ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */
|
||||
ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */
|
||||
ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */
|
||||
ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */
|
||||
ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */
|
||||
ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */
|
||||
ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */
|
||||
ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */
|
||||
ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */
|
||||
ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */
|
||||
ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */
|
||||
ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */
|
||||
ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */
|
||||
ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */
|
||||
ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */
|
||||
ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */
|
||||
ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */
|
||||
ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */
|
||||
ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */
|
||||
ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */
|
||||
ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */
|
||||
ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */
|
||||
ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
ucp_Common = G_UNICODE_SCRIPT_COMMON,
|
||||
ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
|
||||
|
||||
ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
|
||||
ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
|
||||
ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
|
||||
ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
|
||||
ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
|
||||
ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
|
||||
ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
|
||||
ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
|
||||
ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
|
||||
ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
|
||||
ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
|
||||
ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
|
||||
ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
|
||||
ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
|
||||
ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
|
||||
ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
|
||||
ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
|
||||
ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
|
||||
ucp_Greek = G_UNICODE_SCRIPT_GREEK,
|
||||
ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
|
||||
ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
|
||||
ucp_Han = G_UNICODE_SCRIPT_HAN,
|
||||
ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
|
||||
ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
|
||||
ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
|
||||
ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
|
||||
ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
|
||||
ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
|
||||
ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
|
||||
ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
|
||||
ucp_Lao = G_UNICODE_SCRIPT_LAO,
|
||||
ucp_Latin = G_UNICODE_SCRIPT_LATIN,
|
||||
ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
|
||||
ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
|
||||
ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
|
||||
ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
|
||||
ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
|
||||
ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
|
||||
ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
|
||||
ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
|
||||
ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
|
||||
ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
|
||||
ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
|
||||
ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
|
||||
ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
|
||||
ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
|
||||
ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
|
||||
ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
|
||||
ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
|
||||
ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
|
||||
ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
|
||||
ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
|
||||
ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
|
||||
ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
|
||||
ucp_Thai = G_UNICODE_SCRIPT_THAI,
|
||||
ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
|
||||
ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
|
||||
ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
|
||||
ucp_Yi = G_UNICODE_SCRIPT_YI,
|
||||
/* New for Unicode 5.0: */
|
||||
ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
|
||||
ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
|
||||
ucp_Nko = G_UNICODE_SCRIPT_NKO,
|
||||
ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
|
||||
ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
|
||||
/* New for Unicode 5.1: */
|
||||
ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
|
||||
ucp_Cham = G_UNICODE_SCRIPT_CHAM,
|
||||
ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
|
||||
ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
|
||||
ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
|
||||
ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
|
||||
ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
|
||||
ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
|
||||
ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
|
||||
ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
|
||||
ucp_Vai = G_UNICODE_SCRIPT_VAI,
|
||||
/* New for Unicode 5.2: */
|
||||
ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
|
||||
ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
|
||||
ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
|
||||
ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
|
||||
ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
|
||||
ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
|
||||
ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
|
||||
ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
|
||||
ucp_Lisu = G_UNICODE_SCRIPT_LISU,
|
||||
ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
|
||||
ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
|
||||
ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
|
||||
ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
|
||||
ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
|
||||
ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
|
||||
/* New for Unicode 6.0.0: */
|
||||
ucp_Batak = G_UNICODE_SCRIPT_BATAK,
|
||||
ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
|
||||
ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
|
||||
/* New for Unicode 6.1.0: */
|
||||
ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
|
||||
ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
|
||||
ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
|
||||
ucp_Miao = G_UNICODE_SCRIPT_MIAO,
|
||||
ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
|
||||
ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
|
||||
ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* End of ucp.h */
|
@ -25,11 +25,7 @@
|
||||
#include <locale.h>
|
||||
#include "glib.h"
|
||||
|
||||
#ifdef USE_SYSTEM_PCRE
|
||||
#include <pcre.h>
|
||||
#else
|
||||
#include "glib/pcre/pcre.h"
|
||||
#endif
|
||||
|
||||
/* U+20AC EURO SIGN (symbol, currency) */
|
||||
#define EURO "\xe2\x82\xac"
|
||||
@ -2678,7 +2674,7 @@ main (int argc, char *argv[])
|
||||
TEST_EXPAND("a", "a", "\\0130", FALSE, "X");
|
||||
TEST_EXPAND("a", "a", "\\\\\\0", FALSE, "\\a");
|
||||
TEST_EXPAND("a(?P<G>.)c", "xabcy", "X\\g<G>X", FALSE, "XbX");
|
||||
#ifndef USE_SYSTEM_PCRE
|
||||
#if !(PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 34))
|
||||
/* PCRE >= 8.34 no longer allows this usage. */
|
||||
TEST_EXPAND("(.)(?P<1>.)", "ab", "\\1", FALSE, "a");
|
||||
TEST_EXPAND("(.)(?P<1>.)", "ab", "\\g<1>", FALSE, "a");
|
||||
|
@ -1,94 +0,0 @@
|
||||
From 5238ab10c5f3082a4be38410bd01a47ab176dfde Mon Sep 17 00:00:00 2001
|
||||
From: Christian Persch <chpe@gnome.org>
|
||||
Date: Sun, 12 Feb 2012 19:29:42 +0100
|
||||
Subject: [PATCH] regex: Use g_ascii_is[x]digit
|
||||
|
||||
---
|
||||
glib/pcre/pcre_compile.c | 22 ++++++++++++----------
|
||||
1 files changed, 12 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
|
||||
index 8070f51..eb985df 100644
|
||||
--- a/glib/pcre/pcre_compile.c
|
||||
+++ b/glib/pcre/pcre_compile.c
|
||||
@@ -52,6 +52,7 @@ supporting internal functions that are not used by other modules. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
+#include "gstrfuncs.h"
|
||||
|
||||
/* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
|
||||
is also used by pcretest. PCRE_DEBUG is not defined when building a production
|
||||
@@ -513,6 +514,7 @@ into a subtraction and unsigned comparison). */
|
||||
|
||||
#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
|
||||
|
||||
+#if 0
|
||||
#ifndef EBCDIC
|
||||
|
||||
/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
|
||||
@@ -626,7 +628,7 @@ static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
|
||||
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
|
||||
#endif
|
||||
-
|
||||
+#endif /* 0 */
|
||||
|
||||
/* Definition to allow mutual recursion */
|
||||
|
||||
@@ -812,10 +814,10 @@ else
|
||||
{
|
||||
/* In JavaScript, \u must be followed by four hexadecimal numbers.
|
||||
Otherwise it is a lowercase u letter. */
|
||||
- if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
|
||||
- && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
|
||||
- && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
|
||||
- && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
|
||||
+ if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
|
||||
+ && MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0
|
||||
+ && MAX_255(ptr[3]) && g_ascii_isxdigit(ptr[3]) != 0
|
||||
+ && MAX_255(ptr[4]) && g_ascii_isxdigit(ptr[4]) != 0)
|
||||
{
|
||||
c = 0;
|
||||
for (i = 0; i < 4; ++i)
|
||||
@@ -1012,8 +1014,8 @@ else
|
||||
{
|
||||
/* In JavaScript, \x must be followed by two hexadecimal numbers.
|
||||
Otherwise it is a lowercase x letter. */
|
||||
- if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
|
||||
- && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
|
||||
+ if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
|
||||
+ && MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0)
|
||||
{
|
||||
c = 0;
|
||||
for (i = 0; i < 2; ++i)
|
||||
@@ -1036,7 +1038,7 @@ else
|
||||
const pcre_uchar *pt = ptr + 2;
|
||||
|
||||
c = 0;
|
||||
- while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
|
||||
+ while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0)
|
||||
{
|
||||
register int cc = *pt++;
|
||||
if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
|
||||
@@ -1060,7 +1062,7 @@ else
|
||||
|
||||
if (c < 0)
|
||||
{
|
||||
- while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;
|
||||
+ while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0) pt++;
|
||||
*errorcodeptr = ERR34;
|
||||
}
|
||||
|
||||
@@ -1078,7 +1080,7 @@ else
|
||||
/* Read just a single-byte hex-defined char */
|
||||
|
||||
c = 0;
|
||||
- while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
|
||||
+ while (i++ < 2 && MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0)
|
||||
{
|
||||
int cc; /* Some compilers don't like */
|
||||
cc = *(++ptr); /* ++ in initializers */
|
||||
--
|
||||
1.7.5.1.217.g4e3aa.dirty
|
||||
|
@ -1,40 +0,0 @@
|
||||
From acf401f1353a37b6edff9577ff07d055c625e4ca Mon Sep 17 00:00:00 2001
|
||||
From: Christian Persch <chpe@gnome.org>
|
||||
Date: Sun, 12 Feb 2012 19:40:48 +0100
|
||||
Subject: [PATCH] regex: Use glib memory allocator
|
||||
|
||||
---
|
||||
glib/pcre/pcre_globals.c | 10 ++++++----
|
||||
1 files changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/glib/pcre/pcre_globals.c b/glib/pcre/pcre_globals.c
|
||||
index 36e6ddb..93d3af5 100644
|
||||
--- a/glib/pcre/pcre_globals.c
|
||||
+++ b/glib/pcre/pcre_globals.c
|
||||
@@ -58,6 +58,8 @@ global variables are not used. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
+#include "gmem.h"
|
||||
+
|
||||
#if defined _MSC_VER || defined __SYMBIAN32__
|
||||
static void* LocalPcreMalloc(size_t aSize)
|
||||
{
|
||||
@@ -74,10 +76,10 @@ PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
|
||||
#elif !defined VPCOMPAT
|
||||
-PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
|
||||
-PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = free;
|
||||
-PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
|
||||
-PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free;
|
||||
+PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = g_try_malloc;
|
||||
+PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = g_free;
|
||||
+PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = g_try_malloc;
|
||||
+PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = g_free;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
#endif
|
||||
|
||||
--
|
||||
1.7.5.1.217.g4e3aa.dirty
|
||||
|
@ -1,834 +0,0 @@
|
||||
From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Persch <chpe@gnome.org>
|
||||
Date: Sun, 12 Feb 2012 21:20:33 +0100
|
||||
Subject: [PATCH] regex: Use glib for unicode data
|
||||
|
||||
Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
|
||||
---
|
||||
glib/pcre/pcre_compile.c | 26 +++---
|
||||
glib/pcre/pcre_dfa_exec.c | 96 ++++++++--------
|
||||
glib/pcre/pcre_exec.c | 26 +++---
|
||||
glib/pcre/pcre_internal.h | 11 +--
|
||||
glib/pcre/pcre_tables.c | 16 +++
|
||||
glib/pcre/pcre_xclass.c | 24 ++--
|
||||
glib/pcre/ucp.h | 265 +++++++++++++++++++++++----------------------
|
||||
7 files changed, 239 insertions(+), 225 deletions(-)
|
||||
|
||||
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
|
||||
index 21bef80..a6c84e1 100644
|
||||
--- a/glib/pcre/pcre_compile.c
|
||||
+++ b/glib/pcre/pcre_compile.c
|
||||
@@ -2920,43 +2920,43 @@ Returns: TRUE if auto-possessifying is OK
|
||||
static BOOL
|
||||
check_char_prop(int c, int ptype, int pdata, BOOL negated)
|
||||
{
|
||||
-const ucd_record *prop = GET_UCD(c);
|
||||
+const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
switch(ptype)
|
||||
{
|
||||
case PT_LAMP:
|
||||
- return (prop->chartype == ucp_Lu ||
|
||||
- prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt) == negated;
|
||||
+ return (chartype == ucp_Lu ||
|
||||
+ chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt) == negated;
|
||||
|
||||
case PT_GC:
|
||||
- return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
|
||||
+ return (pdata == PRIV(ucp_gentype)[chartype]) == negated;
|
||||
|
||||
case PT_PC:
|
||||
- return (pdata == prop->chartype) == negated;
|
||||
+ return (pdata == chartype) == negated;
|
||||
|
||||
case PT_SC:
|
||||
- return (pdata == prop->script) == negated;
|
||||
+ return (pdata == UCD_SCRIPT(c)) == negated;
|
||||
|
||||
/* These are specials */
|
||||
|
||||
case PT_ALNUM:
|
||||
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
|
||||
+ return (PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N) == negated;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
||||
== negated;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR)
|
||||
== negated;
|
||||
|
||||
case PT_WORD:
|
||||
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ return (PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE) == negated;
|
||||
}
|
||||
return FALSE;
|
||||
diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
|
||||
index 9565d46..3f913ce 100644
|
||||
--- a/glib/pcre/pcre_dfa_exec.c
|
||||
+++ b/glib/pcre/pcre_dfa_exec.c
|
||||
@@ -1060,7 +1060,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
- const ucd_record * prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
switch(code[1])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1068,43 +1068,43 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt;
|
||||
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == code[2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- OK = prop->chartype == code[2];
|
||||
+ OK = chartype == code[2];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- OK = prop->script == code[2];
|
||||
+ OK = UCD_SCRIPT(c) == code[2];
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE;
|
||||
break;
|
||||
|
||||
@@ -1294,7 +1294,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
- const ucd_record * prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1302,43 +1302,43 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt;
|
||||
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- OK = prop->chartype == code[3];
|
||||
+ OK = chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- OK = prop->script == code[3];
|
||||
+ OK = UCD_SCRIPT(c) == code[3];
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE;
|
||||
break;
|
||||
|
||||
@@ -1541,7 +1541,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
- const ucd_record * prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1549,43 +1549,43 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt;
|
||||
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- OK = prop->chartype == code[3];
|
||||
+ OK = chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- OK = prop->script == code[3];
|
||||
+ OK = UCD_SCRIPT(c) == code[3];
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE;
|
||||
break;
|
||||
|
||||
@@ -1813,7 +1813,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
- const ucd_record * prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
switch(code[1 + IMM2_SIZE + 1])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1821,43 +1821,43 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt;
|
||||
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- OK = prop->chartype == code[1 + IMM2_SIZE + 2];
|
||||
+ OK = chartype == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- OK = prop->script == code[1 + IMM2_SIZE + 2];
|
||||
+ OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE;
|
||||
break;
|
||||
|
||||
diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
|
||||
index 830b8b5..c89a3f9 100644
|
||||
--- a/glib/pcre/pcre_exec.c
|
||||
+++ b/glib/pcre/pcre_exec.c
|
||||
@@ -2565,7 +2565,7 @@ for (;;)
|
||||
}
|
||||
GETCHARINCTEST(c, eptr);
|
||||
{
|
||||
- const ucd_record *prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
|
||||
switch(ecode[1])
|
||||
{
|
||||
@@ -2574,44 +2574,44 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- if ((prop->chartype == ucp_Lu ||
|
||||
- prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
|
||||
+ if ((chartype == ucp_Lu ||
|
||||
+ chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt) == (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
|
||||
+ if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- if ((ecode[2] != prop->chartype) == (op == OP_PROP))
|
||||
+ if ((ecode[2] != chartype) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- if ((ecode[2] != prop->script) == (op == OP_PROP))
|
||||
+ if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
/* These are specials */
|
||||
|
||||
case PT_ALNUM:
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
||||
== (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR)
|
||||
== (op == OP_NOTPROP))
|
||||
@@ -2619,8 +2619,8 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
|
||||
index 181c312..234af1b 100644
|
||||
--- a/glib/pcre/pcre_internal.h
|
||||
+++ b/glib/pcre/pcre_internal.h
|
||||
@@ -2329,15 +2329,12 @@ extern const int PRIV(ucp_typerange)[];
|
||||
#ifdef SUPPORT_UCP
|
||||
/* UCD access macros */
|
||||
|
||||
-#define UCD_BLOCK_SIZE 128
|
||||
-#define GET_UCD(ch) (PRIV(ucd_records) + \
|
||||
- PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
|
||||
- UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
|
||||
+unsigned int _pcre_ucp_othercase(const unsigned int c);
|
||||
|
||||
-#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
||||
-#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
||||
+#define UCD_CHARTYPE(ch) (pcre_uint8)g_unichar_type((gunichar)(ch))
|
||||
+#define UCD_SCRIPT(ch) (pcre_uint8)g_unichar_get_script((gunichar)(ch))
|
||||
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
|
||||
-#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
|
||||
+#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch))
|
||||
|
||||
#endif /* SUPPORT_UCP */
|
||||
|
||||
diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
|
||||
index 7ac2d89..e401974 100644
|
||||
--- a/glib/pcre/pcre_tables.c
|
||||
+++ b/glib/pcre/pcre_tables.c
|
||||
@@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = {
|
||||
|
||||
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
||||
+unsigned int
|
||||
+_pcre_ucp_othercase(const unsigned int c)
|
||||
+{
|
||||
+ int other_case = NOTACHAR;
|
||||
+
|
||||
+ if (g_unichar_islower(c))
|
||||
+ other_case = g_unichar_toupper(c);
|
||||
+ else if (g_unichar_isupper(c))
|
||||
+ other_case = g_unichar_tolower(c);
|
||||
+
|
||||
+ if (other_case == c)
|
||||
+ other_case = NOTACHAR;
|
||||
+
|
||||
+ return other_case;
|
||||
+}
|
||||
+
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
/* End of pcre_tables.c */
|
||||
diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c
|
||||
index dca7a39..e5a55d7 100644
|
||||
--- a/glib/pcre/pcre_xclass.c
|
||||
+++ b/glib/pcre/pcre_xclass.c
|
||||
@@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END)
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
- const ucd_record *prop = GET_UCD(c);
|
||||
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
@@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
- if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
- prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
|
||||
+ if ((chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
+ chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
- if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
|
||||
+ if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
- if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
|
||||
+ if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
- if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
|
||||
+ if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
||||
== (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
|
||||
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
+ PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
|
||||
== (t == XCL_PROP))
|
||||
return !negated;
|
||||
break;
|
||||
diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h
|
||||
index 59c3bec..53a48c9 100644
|
||||
--- a/glib/pcre/ucp.h
|
||||
+++ b/glib/pcre/ucp.h
|
||||
@@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode
|
||||
should always be at the end of each enum, for backwards compatibility. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
+#include "gunicode.h"
|
||||
|
||||
enum {
|
||||
ucp_C, /* Other */
|
||||
@@ -24,148 +25,148 @@ enum {
|
||||
/* These are the particular character types. */
|
||||
|
||||
enum {
|
||||
- ucp_Cc, /* Control */
|
||||
- ucp_Cf, /* Format */
|
||||
- ucp_Cn, /* Unassigned */
|
||||
- ucp_Co, /* Private use */
|
||||
- ucp_Cs, /* Surrogate */
|
||||
- ucp_Ll, /* Lower case letter */
|
||||
- ucp_Lm, /* Modifier letter */
|
||||
- ucp_Lo, /* Other letter */
|
||||
- ucp_Lt, /* Title case letter */
|
||||
- ucp_Lu, /* Upper case letter */
|
||||
- ucp_Mc, /* Spacing mark */
|
||||
- ucp_Me, /* Enclosing mark */
|
||||
- ucp_Mn, /* Non-spacing mark */
|
||||
- ucp_Nd, /* Decimal number */
|
||||
- ucp_Nl, /* Letter number */
|
||||
- ucp_No, /* Other number */
|
||||
- ucp_Pc, /* Connector punctuation */
|
||||
- ucp_Pd, /* Dash punctuation */
|
||||
- ucp_Pe, /* Close punctuation */
|
||||
- ucp_Pf, /* Final punctuation */
|
||||
- ucp_Pi, /* Initial punctuation */
|
||||
- ucp_Po, /* Other punctuation */
|
||||
- ucp_Ps, /* Open punctuation */
|
||||
- ucp_Sc, /* Currency symbol */
|
||||
- ucp_Sk, /* Modifier symbol */
|
||||
- ucp_Sm, /* Mathematical symbol */
|
||||
- ucp_So, /* Other symbol */
|
||||
- ucp_Zl, /* Line separator */
|
||||
- ucp_Zp, /* Paragraph separator */
|
||||
- ucp_Zs /* Space separator */
|
||||
+ ucp_Cc = G_UNICODE_CONTROL, /* Control */
|
||||
+ ucp_Cf = G_UNICODE_FORMAT, /* Format */
|
||||
+ ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */
|
||||
+ ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */
|
||||
+ ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */
|
||||
+ ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */
|
||||
+ ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */
|
||||
+ ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */
|
||||
+ ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */
|
||||
+ ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */
|
||||
+ ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */
|
||||
+ ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */
|
||||
+ ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */
|
||||
+ ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */
|
||||
+ ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */
|
||||
+ ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */
|
||||
+ ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */
|
||||
+ ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */
|
||||
+ ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */
|
||||
+ ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */
|
||||
+ ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */
|
||||
+ ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */
|
||||
+ ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */
|
||||
+ ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */
|
||||
+ ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */
|
||||
+ ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */
|
||||
+ ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */
|
||||
+ ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */
|
||||
+ ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */
|
||||
+ ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
- ucp_Arabic,
|
||||
- ucp_Armenian,
|
||||
- ucp_Bengali,
|
||||
- ucp_Bopomofo,
|
||||
- ucp_Braille,
|
||||
- ucp_Buginese,
|
||||
- ucp_Buhid,
|
||||
- ucp_Canadian_Aboriginal,
|
||||
- ucp_Cherokee,
|
||||
- ucp_Common,
|
||||
- ucp_Coptic,
|
||||
- ucp_Cypriot,
|
||||
- ucp_Cyrillic,
|
||||
- ucp_Deseret,
|
||||
- ucp_Devanagari,
|
||||
- ucp_Ethiopic,
|
||||
- ucp_Georgian,
|
||||
- ucp_Glagolitic,
|
||||
- ucp_Gothic,
|
||||
- ucp_Greek,
|
||||
- ucp_Gujarati,
|
||||
- ucp_Gurmukhi,
|
||||
- ucp_Han,
|
||||
- ucp_Hangul,
|
||||
- ucp_Hanunoo,
|
||||
- ucp_Hebrew,
|
||||
- ucp_Hiragana,
|
||||
- ucp_Inherited,
|
||||
- ucp_Kannada,
|
||||
- ucp_Katakana,
|
||||
- ucp_Kharoshthi,
|
||||
- ucp_Khmer,
|
||||
- ucp_Lao,
|
||||
- ucp_Latin,
|
||||
- ucp_Limbu,
|
||||
- ucp_Linear_B,
|
||||
- ucp_Malayalam,
|
||||
- ucp_Mongolian,
|
||||
- ucp_Myanmar,
|
||||
- ucp_New_Tai_Lue,
|
||||
- ucp_Ogham,
|
||||
- ucp_Old_Italic,
|
||||
- ucp_Old_Persian,
|
||||
- ucp_Oriya,
|
||||
- ucp_Osmanya,
|
||||
- ucp_Runic,
|
||||
- ucp_Shavian,
|
||||
- ucp_Sinhala,
|
||||
- ucp_Syloti_Nagri,
|
||||
- ucp_Syriac,
|
||||
- ucp_Tagalog,
|
||||
- ucp_Tagbanwa,
|
||||
- ucp_Tai_Le,
|
||||
- ucp_Tamil,
|
||||
- ucp_Telugu,
|
||||
- ucp_Thaana,
|
||||
- ucp_Thai,
|
||||
- ucp_Tibetan,
|
||||
- ucp_Tifinagh,
|
||||
- ucp_Ugaritic,
|
||||
- ucp_Yi,
|
||||
+ ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
|
||||
+ ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
|
||||
+ ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
|
||||
+ ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
|
||||
+ ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
|
||||
+ ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
|
||||
+ ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
|
||||
+ ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
|
||||
+ ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
|
||||
+ ucp_Common = G_UNICODE_SCRIPT_COMMON,
|
||||
+ ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
|
||||
+ ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
|
||||
+ ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
|
||||
+ ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
|
||||
+ ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
|
||||
+ ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
|
||||
+ ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
|
||||
+ ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
|
||||
+ ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
|
||||
+ ucp_Greek = G_UNICODE_SCRIPT_GREEK,
|
||||
+ ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
|
||||
+ ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
|
||||
+ ucp_Han = G_UNICODE_SCRIPT_HAN,
|
||||
+ ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
|
||||
+ ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
|
||||
+ ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
|
||||
+ ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
|
||||
+ ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
|
||||
+ ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
|
||||
+ ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
|
||||
+ ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
|
||||
+ ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
|
||||
+ ucp_Lao = G_UNICODE_SCRIPT_LAO,
|
||||
+ ucp_Latin = G_UNICODE_SCRIPT_LATIN,
|
||||
+ ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
|
||||
+ ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
|
||||
+ ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
|
||||
+ ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
|
||||
+ ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
|
||||
+ ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
|
||||
+ ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
|
||||
+ ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
|
||||
+ ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
|
||||
+ ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
|
||||
+ ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
|
||||
+ ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
|
||||
+ ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
|
||||
+ ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
|
||||
+ ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
|
||||
+ ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
|
||||
+ ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
|
||||
+ ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
|
||||
+ ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
|
||||
+ ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
|
||||
+ ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
|
||||
+ ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
|
||||
+ ucp_Thai = G_UNICODE_SCRIPT_THAI,
|
||||
+ ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
|
||||
+ ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
|
||||
+ ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
|
||||
+ ucp_Yi = G_UNICODE_SCRIPT_YI,
|
||||
/* New for Unicode 5.0: */
|
||||
- ucp_Balinese,
|
||||
- ucp_Cuneiform,
|
||||
- ucp_Nko,
|
||||
- ucp_Phags_Pa,
|
||||
- ucp_Phoenician,
|
||||
+ ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
|
||||
+ ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
|
||||
+ ucp_Nko = G_UNICODE_SCRIPT_NKO,
|
||||
+ ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
|
||||
+ ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
|
||||
/* New for Unicode 5.1: */
|
||||
- ucp_Carian,
|
||||
- ucp_Cham,
|
||||
- ucp_Kayah_Li,
|
||||
- ucp_Lepcha,
|
||||
- ucp_Lycian,
|
||||
- ucp_Lydian,
|
||||
- ucp_Ol_Chiki,
|
||||
- ucp_Rejang,
|
||||
- ucp_Saurashtra,
|
||||
- ucp_Sundanese,
|
||||
- ucp_Vai,
|
||||
+ ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
|
||||
+ ucp_Cham = G_UNICODE_SCRIPT_CHAM,
|
||||
+ ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
|
||||
+ ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
|
||||
+ ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
|
||||
+ ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
|
||||
+ ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
|
||||
+ ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
|
||||
+ ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
|
||||
+ ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
|
||||
+ ucp_Vai = G_UNICODE_SCRIPT_VAI,
|
||||
/* New for Unicode 5.2: */
|
||||
- ucp_Avestan,
|
||||
- ucp_Bamum,
|
||||
- ucp_Egyptian_Hieroglyphs,
|
||||
- ucp_Imperial_Aramaic,
|
||||
- ucp_Inscriptional_Pahlavi,
|
||||
- ucp_Inscriptional_Parthian,
|
||||
- ucp_Javanese,
|
||||
- ucp_Kaithi,
|
||||
- ucp_Lisu,
|
||||
- ucp_Meetei_Mayek,
|
||||
- ucp_Old_South_Arabian,
|
||||
- ucp_Old_Turkic,
|
||||
- ucp_Samaritan,
|
||||
- ucp_Tai_Tham,
|
||||
- ucp_Tai_Viet,
|
||||
+ ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
|
||||
+ ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
|
||||
+ ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
|
||||
+ ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
|
||||
+ ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
|
||||
+ ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
|
||||
+ ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
|
||||
+ ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
|
||||
+ ucp_Lisu = G_UNICODE_SCRIPT_LISU,
|
||||
+ ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
|
||||
+ ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
|
||||
+ ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
|
||||
+ ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
|
||||
+ ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
|
||||
+ ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
|
||||
/* New for Unicode 6.0.0: */
|
||||
- ucp_Batak,
|
||||
- ucp_Brahmi,
|
||||
- ucp_Mandaic,
|
||||
+ ucp_Batak = G_UNICODE_SCRIPT_BATAK,
|
||||
+ ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
|
||||
+ ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
|
||||
/* New for Unicode 6.1.0: */
|
||||
- ucp_Chakma,
|
||||
- ucp_Meroitic_Cursive,
|
||||
- ucp_Meroitic_Hieroglyphs,
|
||||
- ucp_Miao,
|
||||
- ucp_Sharada,
|
||||
- ucp_Sora_Sompeng,
|
||||
- ucp_Takri
|
||||
+ ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
|
||||
+ ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
|
||||
+ ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
|
||||
+ ucp_Miao = G_UNICODE_SCRIPT_MIAO,
|
||||
+ ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
|
||||
+ ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
|
||||
+ ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
|
||||
};
|
||||
|
||||
#endif
|
||||
--
|
||||
1.7.5.1.217.g4e3aa.dirty
|
||||
|
@ -1,124 +0,0 @@
|
||||
#! /bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
IN="../update-pcre"
|
||||
PCRE=$1
|
||||
|
||||
if [ "x$PCRE" = x ] || [ "x$PCRE" = x--help ] || [ "x$PCRE" = x-h ]; then
|
||||
cat >&2 << EOF
|
||||
|
||||
$0 PCRE-DIR
|
||||
|
||||
Updates the local PCRE copy with a different version of the library,
|
||||
contained in the directory PCRE-DIR.
|
||||
|
||||
This will delete the content of the local pcre directory, copy the
|
||||
necessary files from PCRE-DIR, and generate other needed files, such
|
||||
as Makefile.am
|
||||
EOF
|
||||
exit
|
||||
fi
|
||||
|
||||
if [ ! -f gregex.h ]; then
|
||||
echo "This script should be executed from the directory containing gregex.c." 2> /dev/null
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "${PCRE}/Makefile.in" ] || [ ! -f "${PCRE}/pcre_compile.c" ]; then
|
||||
echo "'${PCRE}' does not contain a valid PCRE version." 2> /dev/null
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
echo "Deleting old PCRE library"
|
||||
mv pcre/.svn tmp-pcre-svn
|
||||
rm -R pcre 2> /dev/null
|
||||
mkdir pcre
|
||||
cd pcre
|
||||
|
||||
# pcre_chartables.c is generated by dfatables.
|
||||
# We do not want to compile and execute dfatables.c every time, because
|
||||
# this could be a problem (e.g. when cross-compiling), so now generate
|
||||
# the file and then distribute it with GRegex.
|
||||
echo "Generating pcre_chartables.c"
|
||||
cp -R "${PCRE}" tmp-build
|
||||
(
|
||||
cd tmp-build || exit 1
|
||||
./configure --enable-utf8 --enable-unicode-properties --disable-cpp > /dev/null
|
||||
make pcre_chartables.c > /dev/null
|
||||
cat > ../pcre_chartables.c << \EOF
|
||||
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||
* the update of the local copy of PCRE.
|
||||
*/
|
||||
EOF
|
||||
cat pcre_chartables.c >> ../pcre_chartables.c
|
||||
)
|
||||
rm -R tmp-build
|
||||
|
||||
# Compiled C files.
|
||||
echo "Generating makefiles"
|
||||
all_files=$(awk '/^OBJ = /, /^\\s*$/ ' \
|
||||
'{' \
|
||||
'sub("^OBJ = ", "");' \
|
||||
'sub(".@OBJEXT@[[:blank:]]*\\\\\\\\", "");' \
|
||||
'sub("\\\\$\\\\(POSIX_OBJ\\\\)", "");' \
|
||||
'print;' \
|
||||
'}' \
|
||||
"${PCRE}/Makefile.in")
|
||||
|
||||
# Headers.
|
||||
included_files="pcre.h pcre_internal.h ucp.h ucpinternal.h"
|
||||
|
||||
# Generate Makefile.am.
|
||||
cat $IN/Makefile.am-1 > Makefile.am
|
||||
for name in $all_files; do
|
||||
echo " $name.c \\" >> Makefile.am
|
||||
if [ "${name}" != pcre_chartables ]; then
|
||||
# pcre_chartables.c is a generated file.
|
||||
cp "${PCRE}/${name}.c" .
|
||||
fi
|
||||
done
|
||||
for f in $included_files; do
|
||||
echo " $f \\" >> Makefile.am
|
||||
cp "${PCRE}/${f}" .
|
||||
done
|
||||
cat $IN/Makefile.am-2 >> Makefile.am
|
||||
|
||||
echo "Patching PCRE"
|
||||
|
||||
# Copy the license.
|
||||
cp "${PCRE}/COPYING" .
|
||||
|
||||
# Use glib for memory allocation.
|
||||
patch > /dev/null < $IN/memory.patch
|
||||
|
||||
# Copy the modified version of pcre_valid_utf8.c.
|
||||
cp $IN/pcre_valid_utf8.c .
|
||||
|
||||
# Copy the modified version of pcre_ucp_searchfuncs.c that uses glib
|
||||
# for Unicode properties.
|
||||
cp $IN/pcre_ucp_searchfuncs.c .
|
||||
patch > /dev/null < $IN/ucp.patch
|
||||
|
||||
# Remove the digitab array in pcre_compile.c.
|
||||
patch > /dev/null < $IN/digitab.patch
|
||||
sed -i -e 's/(digitab\[\(.*\)\] & ctype_digit)/g_ascii_isdigit(\1)/' pcre_compile.c
|
||||
sed -i -e 's/(digitab\[\(.*\)\] & ctype_xdigit)/g_ascii_isxdigit(\1)/' pcre_compile.c
|
||||
|
||||
# Reduce the number of relocations.
|
||||
python $IN/make_utt.py
|
||||
patch > /dev/null < $IN/utt.patch
|
||||
patch > /dev/null < $IN/table-reduction.patch
|
||||
|
||||
# Copy back the old SVN directory.
|
||||
mv ../tmp-pcre-svn .svn
|
||||
|
||||
|
||||
cat << EOF
|
||||
|
||||
Update completed. You now should check that everything is working.
|
||||
Remember to update the regex syntax doc with the new features
|
||||
(docs/reference/glib/regex-syntax.sgml) and to run the tests.
|
||||
EOF
|
||||
|
63
meson.build
63
meson.build
@ -1979,46 +1979,37 @@ else
|
||||
endif
|
||||
endif
|
||||
|
||||
if get_option('internal_pcre')
|
||||
pcre = []
|
||||
use_system_pcre = false
|
||||
else
|
||||
pcre = dependency('libpcre', version: '>= 8.31', required : false) # Should check for Unicode support, too. FIXME
|
||||
if not pcre.found()
|
||||
if cc.get_id() == 'msvc' or cc.get_id() == 'clang-cl'
|
||||
# MSVC: Search for the PCRE library by the configuration, which corresponds
|
||||
# to the output of CMake builds of PCRE. Note that debugoptimized
|
||||
# is really a Release build with .PDB files.
|
||||
if vs_crt == 'debug'
|
||||
pcre = cc.find_library('pcred', required : false)
|
||||
else
|
||||
pcre = cc.find_library('pcre', required : false)
|
||||
endif
|
||||
pcre = dependency('libpcre', version: '>= 8.31', required : false) # Should check for Unicode support, too. FIXME
|
||||
if not pcre.found()
|
||||
if cc.get_id() == 'msvc' or cc.get_id() == 'clang-cl'
|
||||
# MSVC: Search for the PCRE library by the configuration, which corresponds
|
||||
# to the output of CMake builds of PCRE. Note that debugoptimized
|
||||
# is really a Release build with .PDB files.
|
||||
if vs_crt == 'debug'
|
||||
pcre = cc.find_library('pcred', required : false)
|
||||
else
|
||||
pcre = cc.find_library('pcre', required : false)
|
||||
endif
|
||||
endif
|
||||
use_system_pcre = pcre.found()
|
||||
endif
|
||||
glib_conf.set('USE_SYSTEM_PCRE', use_system_pcre)
|
||||
|
||||
use_pcre_static_flag = false
|
||||
|
||||
if host_system == 'windows'
|
||||
if not use_system_pcre
|
||||
use_pcre_static_flag = true
|
||||
else
|
||||
pcre_static = cc.links('''#define PCRE_STATIC
|
||||
#include <pcre.h>
|
||||
int main() {
|
||||
void *p = NULL;
|
||||
pcre_free(p);
|
||||
return 0;
|
||||
}''',
|
||||
dependencies: pcre,
|
||||
name : 'Windows system PCRE is a static build')
|
||||
if pcre_static
|
||||
use_pcre_static_flag = true
|
||||
endif
|
||||
endif
|
||||
# Try again with the fallback
|
||||
if not pcre.found()
|
||||
pcre = dependency('libpcre', required : true, fallback : ['libpcre', 'pcre_dep'])
|
||||
use_pcre_static_flag = true
|
||||
elif host_system == 'windows'
|
||||
pcre_static = cc.links('''#define PCRE_STATIC
|
||||
#include <pcre.h>
|
||||
int main() {
|
||||
void *p = NULL;
|
||||
pcre_free(p);
|
||||
return 0;
|
||||
}''',
|
||||
dependencies: pcre,
|
||||
name : 'Windows system PCRE is a static build')
|
||||
use_pcre_static_flag = pcre_static
|
||||
else
|
||||
use_pcre_static_flag = false
|
||||
endif
|
||||
|
||||
libm = cc.find_library('m', required : false)
|
||||
|
@ -34,11 +34,6 @@ option('libmount',
|
||||
value : 'auto',
|
||||
description : 'build with libmount support')
|
||||
|
||||
option('internal_pcre',
|
||||
type : 'boolean',
|
||||
value : false,
|
||||
description : 'whether to use internal PCRE')
|
||||
|
||||
option('man',
|
||||
type : 'boolean',
|
||||
value : false,
|
||||
|
11
subprojects/libpcre.wrap
Normal file
11
subprojects/libpcre.wrap
Normal file
@ -0,0 +1,11 @@
|
||||
[wrap-file]
|
||||
directory = pcre-8.37
|
||||
source_url = https://ftp.pcre.org/pub/pcre/pcre-8.37.tar.bz2
|
||||
source_filename = pcre-8.37.tar.bz2
|
||||
source_hash = 51679ea8006ce31379fb0860e46dd86665d864b5020fc9cd19e71260eef4789d
|
||||
patch_filename = pcre_8.37-2_patch.zip
|
||||
patch_url = https://wrapdb.mesonbuild.com/v2/pcre_8.37-2/get_patch
|
||||
patch_hash = 6b80f72385e1bf06721e26fbc83aced576e9c0d3182d86a55dd173a04050fe26
|
||||
|
||||
[provide]
|
||||
libpcre = pcre_dep
|
Loading…
Reference in New Issue
Block a user