mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-08-29 20:34:11 +02:00
Add GRegex for regular expression matching. (#50075)
2007-03-15 Marco Barisione <marco@barisione.org> Add GRegex for regular expression matching. (#50075) * configure.in: Handle GRegex compilation. * glib/gregex.c: * glib/gregex.h: Code for GRegex. * glib/Makefile.am: * glib/makefile.msc.in: Updated makefiles. * glib/pcre/*: Internal copy of PCRE. * glib/update-pcre/*: Stuff to automatically update the internal PCRE to a newer version. * tests/regex-test.c: * tests/Makefile.am: * tests/makefile.msc.in: Add tests for GRegex. svn path=/trunk/; revision=5408
This commit is contained in:
committed by
Marco Barisione
parent
af8671792d
commit
0196d63975
68
glib/pcre/COPYING
Normal file
68
glib/pcre/COPYING
Normal file
@@ -0,0 +1,68 @@
|
||||
PCRE LICENCE
|
||||
------------
|
||||
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||
specified below. The documentation for PCRE, supplied in the "doc"
|
||||
directory, is distributed under the same terms as the software itself.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a set of C++ wrapper functions.
|
||||
|
||||
|
||||
THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
-------------------------
|
||||
|
||||
Contributed by: Google Inc.
|
||||
|
||||
Copyright (c) 2006, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
-----------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the name of Google
|
||||
Inc. nor the names of their contributors may be used to endorse or
|
||||
promote products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
End
|
62
glib/pcre/Makefile.am
Normal file
62
glib/pcre/Makefile.am
Normal file
@@ -0,0 +1,62 @@
|
||||
INCLUDES = \
|
||||
-DG_LOG_DOMAIN=\"GLib-GRegex\" \
|
||||
-DSUPPORT_UCP \
|
||||
-DSUPPORT_UTF8 \
|
||||
-DNEWLINE=-1 \
|
||||
-DMATCH_LIMIT=10000000 \
|
||||
-DMATCH_LIMIT_RECURSION=10000000 \
|
||||
-DMAX_NAME_SIZE=32 \
|
||||
-DMAX_NAME_COUNT=10000 \
|
||||
-DMAX_DUPLENGTH=30000 \
|
||||
-DLINK_SIZE=2 \
|
||||
-DEBCDIC=0 \
|
||||
-DPOSIX_MALLOC_THRESHOLD=10 \
|
||||
-I$(top_srcdir) \
|
||||
-I$(srcdir) \
|
||||
-I$(top_srcdir)/glib \
|
||||
@GLIB_DEBUG_FLAGS@ \
|
||||
-DG_DISABLE_DEPRECATED \
|
||||
$(DEPRECATED_FLAGS)\
|
||||
$(WARN_CFLAGS) \
|
||||
$(PCRE_WARN_CFLAGS) \
|
||||
$(DEP_CFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = libpcre.la
|
||||
|
||||
libpcre_headers =
|
||||
|
||||
libpcre_la_SOURCES = \
|
||||
pcre_chartables.c \
|
||||
pcre_compile.c \
|
||||
pcre_config.c \
|
||||
pcre_dfa_exec.c \
|
||||
pcre_exec.c \
|
||||
pcre_fullinfo.c \
|
||||
pcre_get.c \
|
||||
pcre_globals.c \
|
||||
pcre_info.c \
|
||||
pcre_maketables.c \
|
||||
pcre_newline.c \
|
||||
pcre_ord2utf8.c \
|
||||
pcre_refcount.c \
|
||||
pcre_study.c \
|
||||
pcre_tables.c \
|
||||
pcre_try_flipped.c \
|
||||
pcre_ucp_searchfuncs.c \
|
||||
pcre_valid_utf8.c \
|
||||
pcre_version.c \
|
||||
pcre_xclass.c \
|
||||
pcre.h \
|
||||
pcre_internal.h \
|
||||
ucp.h \
|
||||
ucpinternal.h \
|
||||
$(libpcre_headers)
|
||||
|
||||
libpcre_la_LIBADD = $(DEP_LIBS)
|
||||
|
||||
libpcre_la_LDFLAGS = -no-undefined
|
||||
|
||||
EXTRA_DIST = \
|
||||
COPYING \
|
||||
makefile.msc
|
||||
|
49
glib/pcre/makefile.msc
Normal file
49
glib/pcre/makefile.msc
Normal file
@@ -0,0 +1,49 @@
|
||||
!IFDEF DEBUG
|
||||
CRT=-MDd
|
||||
!ELSE
|
||||
CRT=-MD
|
||||
!ENDIF
|
||||
|
||||
CFLAGS = \
|
||||
-I ..\.. \
|
||||
-DHAVE_CONFIG_H \
|
||||
-DHAVE_LONG_LONG_FORMAT \
|
||||
-DSUPPORT_UCP \
|
||||
-DSUPPORT_UTF8 \
|
||||
-DNEWLINE=10 \
|
||||
-DMATCH_LIMIT=10000000 \
|
||||
-DMATCH_LIMIT_RECURSION=10000000 \
|
||||
-DMAX_NAME_SIZE=32 \
|
||||
-DMAX_NAME_COUNT=10000 \
|
||||
-DMAX_DUPLENGTH=30000 \
|
||||
-DLINK_SIZE=2 \
|
||||
-DEBCDIC=0 \
|
||||
-DPOSIX_MALLOC_THRESHOLD=10
|
||||
|
||||
OBJECTS = \
|
||||
pcre_chartables.obj \
|
||||
pcre_compile.obj \
|
||||
pcre_config.obj \
|
||||
pcre_dfa_exec.obj \
|
||||
pcre_exec.obj \
|
||||
pcre_fullinfo.obj \
|
||||
pcre_get.obj \
|
||||
pcre_globals.obj \
|
||||
pcre_info.obj \
|
||||
pcre_maketables.obj \
|
||||
pcre_newline.obj \
|
||||
pcre_ord2utf8.obj \
|
||||
pcre_refcount.obj \
|
||||
pcre_study.obj \
|
||||
pcre_tables.obj \
|
||||
pcre_try_flipped.obj \
|
||||
pcre_ucp_searchfuncs.obj \
|
||||
pcre_valid_utf8.obj \
|
||||
pcre_version.obj \
|
||||
pcre_xclass.obj \
|
||||
|
||||
pcre.lib : $(OBJECTS)
|
||||
lib -out:pcre.lib $(OBJECTS)
|
||||
|
||||
.c.obj:
|
||||
$(CC) $(CRT) $(CFLAGS) -Ox -GD -c $<
|
283
glib/pcre/pcre.h
Normal file
283
glib/pcre/pcre.h
Normal file
@@ -0,0 +1,283 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, to be #included by
|
||||
applications that call the PCRE functions.
|
||||
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
/* NOTES FOR FUTURE MAINTAINERS: Do not use numbers with leading zeros, because
|
||||
they may be treated as octal constants. The PCRE_PRERELEASE feature is for
|
||||
identifying release candidates. It might be defined as -RC2, for example. In
|
||||
real releases, it should be defined empty. Do not change the alignment of these
|
||||
statments. The code in ./configure greps out the version numbers by using "cut"
|
||||
to get values from column 29 onwards. These are substituted into pcre-config
|
||||
and libpcre.pc. The values are not put into configure.ac and substituted here
|
||||
(which would simplify this issue) because that makes life harder for those who
|
||||
cannot run ./configure. As it now stands, this file need not be edited in that
|
||||
circumstance. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 0
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 18-Dec-2006
|
||||
|
||||
/* Win32 uses DLL by default; it needs special stuff for exported functions
|
||||
when building PCRE. */
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifdef PCRE_DEFINITION
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||
# endif
|
||||
# else
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Otherwise, we use the standard "extern". */
|
||||
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_DATA_SCOPE extern "C"
|
||||
# else
|
||||
# define PCRE_DATA_SCOPE extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
||||
it is needed here for malloc. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options */
|
||||
|
||||
#define PCRE_CASELESS 0x00000001
|
||||
#define PCRE_MULTILINE 0x00000002
|
||||
#define PCRE_DOTALL 0x00000004
|
||||
#define PCRE_EXTENDED 0x00000008
|
||||
#define PCRE_ANCHORED 0x00000010
|
||||
#define PCRE_DOLLAR_ENDONLY 0x00000020
|
||||
#define PCRE_EXTRA 0x00000040
|
||||
#define PCRE_NOTBOL 0x00000080
|
||||
#define PCRE_NOTEOL 0x00000100
|
||||
#define PCRE_UNGREEDY 0x00000200
|
||||
#define PCRE_NOTEMPTY 0x00000400
|
||||
#define PCRE_UTF8 0x00000800
|
||||
#define PCRE_NO_AUTO_CAPTURE 0x00001000
|
||||
#define PCRE_NO_UTF8_CHECK 0x00002000
|
||||
#define PCRE_AUTO_CALLOUT 0x00004000
|
||||
#define PCRE_PARTIAL 0x00008000
|
||||
#define PCRE_DFA_SHORTEST 0x00010000
|
||||
#define PCRE_DFA_RESTART 0x00020000
|
||||
#define PCRE_FIRSTLINE 0x00040000
|
||||
#define PCRE_DUPNAMES 0x00080000
|
||||
#define PCRE_NEWLINE_CR 0x00100000
|
||||
#define PCRE_NEWLINE_LF 0x00200000
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000
|
||||
#define PCRE_NEWLINE_ANY 0x00400000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
#define PCRE_ERROR_NOMATCH (-1)
|
||||
#define PCRE_ERROR_NULL (-2)
|
||||
#define PCRE_ERROR_BADOPTION (-3)
|
||||
#define PCRE_ERROR_BADMAGIC (-4)
|
||||
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
|
||||
#define PCRE_ERROR_NOMEMORY (-6)
|
||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||
#define PCRE_ERROR_BADUTF8 (-10)
|
||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
|
||||
#define PCRE_ERROR_PARTIAL (-12)
|
||||
#define PCRE_ERROR_BADPARTIAL (-13)
|
||||
#define PCRE_ERROR_INTERNAL (-14)
|
||||
#define PCRE_ERROR_BADCOUNT (-15)
|
||||
#define PCRE_ERROR_DFA_UITEM (-16)
|
||||
#define PCRE_ERROR_DFA_UCOND (-17)
|
||||
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22)
|
||||
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
|
||||
#define PCRE_INFO_OPTIONS 0
|
||||
#define PCRE_INFO_SIZE 1
|
||||
#define PCRE_INFO_CAPTURECOUNT 2
|
||||
#define PCRE_INFO_BACKREFMAX 3
|
||||
#define PCRE_INFO_FIRSTBYTE 4
|
||||
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
||||
#define PCRE_INFO_FIRSTTABLE 5
|
||||
#define PCRE_INFO_LASTLITERAL 6
|
||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||
#define PCRE_INFO_NAMECOUNT 8
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_CONFIG_UTF8 0
|
||||
#define PCRE_CONFIG_NEWLINE 1
|
||||
#define PCRE_CONFIG_LINK_SIZE 2
|
||||
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||
#define PCRE_CONFIG_STACKRECURSE 5
|
||||
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||
|
||||
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
|
||||
these bits, just add new ones on the end, in order to remain compatible. */
|
||||
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||
#define PCRE_EXTRA_TABLES 0x0008
|
||||
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
||||
|
||||
/* Types */
|
||||
|
||||
struct real_pcre; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
||||
replaced with a custom type. For conventional use, the public interface is a
|
||||
const char *. */
|
||||
|
||||
#ifndef PCRE_SPTR
|
||||
#define PCRE_SPTR const char *
|
||||
#endif
|
||||
|
||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||
such as way as to be extensible. Always add new fields at the end, in order to
|
||||
remain compatible. */
|
||||
|
||||
typedef struct pcre_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
} pcre_extra;
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. */
|
||||
|
||||
typedef struct pcre_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
PCRE_SPTR subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre_callout_block;
|
||||
|
||||
#include "glib.h"
|
||||
#include "galias.h"
|
||||
|
||||
#define pcre_malloc g_try_malloc
|
||||
#define pcre_free g_free
|
||||
#define pcre_stack_malloc g_try_malloc
|
||||
|
||||
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_DATA_SCOPE int pcre_config(int, void *);
|
||||
PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, char *, int);
|
||||
PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
int);
|
||||
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
|
||||
PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
char **, char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *);
|
||||
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
|
||||
PCRE_DATA_SCOPE int pcre_refcount(pcre *, int);
|
||||
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_DATA_SCOPE const char *pcre_version(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcre.h */
|
195
glib/pcre/pcre_chartables.c
Normal file
195
glib/pcre/pcre_chartables.c
Normal file
@@ -0,0 +1,195 @@
|
||||
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||
* the update of the local copy of PCRE.
|
||||
*/
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file is automatically written by the dftables auxiliary
|
||||
program. If you edit it by hand, you might like to edit the Makefile to
|
||||
prevent its ever being regenerated.
|
||||
|
||||
This file contains the default tables for characters with codes less than
|
||||
128 (ASCII characters). These tables are used when no external tables are
|
||||
passed to PCRE.
|
||||
|
||||
The following #include is present because without it gcc 4.x may remove
|
||||
the array definition from the final binary if PCRE is built into a static
|
||||
library and dead code stripping is activated. This leads to link errors.
|
||||
Pulling in the header ensures that the array gets flagged as "someone
|
||||
outside this compilation unit might reference this" and so it will always
|
||||
be supplied to the linker. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
const unsigned char _pcre_default_tables[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table is a case flipping table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes.
|
||||
Each map is 32 bytes long and the bits run from the least
|
||||
significant end of each byte. The classes that have their own
|
||||
maps are: space, xdigit, digit, upper, lower, word, graph
|
||||
print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
/* This table identifies various classes of character by individual bits:
|
||||
0x01 white space character
|
||||
0x02 letter
|
||||
0x04 decimal digit
|
||||
0x08 hexadecimal digit
|
||||
0x10 alphanumeric or '_'
|
||||
0x80 regular expression metacharacter or binary zero
|
||||
*/
|
||||
|
||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
|
||||
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of chartables.c */
|
5385
glib/pcre/pcre_compile.c
Normal file
5385
glib/pcre/pcre_compile.c
Normal file
File diff suppressed because it is too large
Load Diff
116
glib/pcre/pcre_config.c
Normal file
116
glib/pcre/pcre_config.c
Normal file
@@ -0,0 +1,116 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_config(). */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/* This function has an extensible interface so that additional items can be
|
||||
added compatibly.
|
||||
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
pcre_config(int what, void *where)
|
||||
{
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_CONFIG_UTF8:
|
||||
#ifdef SUPPORT_UTF8
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
||||
#ifdef SUPPORT_UCP
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_NEWLINE:
|
||||
*((int *)where) = NEWLINE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_LINK_SIZE:
|
||||
*((int *)where) = LINK_SIZE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
||||
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT:
|
||||
*((unsigned int *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
||||
*((unsigned int *)where) = MATCH_LIMIT_RECURSION;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_STACKRECURSE:
|
||||
#ifdef NO_RECURSE
|
||||
*((int *)where) = 0;
|
||||
#else
|
||||
*((int *)where) = 1;
|
||||
#endif
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_config.c */
|
2433
glib/pcre/pcre_dfa_exec.c
Normal file
2433
glib/pcre/pcre_dfa_exec.c
Normal file
File diff suppressed because it is too large
Load Diff
4199
glib/pcre/pcre_exec.c
Normal file
4199
glib/pcre/pcre_exec.c
Normal file
File diff suppressed because it is too large
Load Diff
149
glib/pcre/pcre_fullinfo.c
Normal file
149
glib/pcre/pcre_fullinfo.c
Normal file
@@ -0,0 +1,149 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/*PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_fullinfo(), which returns
|
||||
information about a compiled pattern. */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is a newer "info" function which has an extensible interface so
|
||||
that additional items can be added compatibly.
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
extra_data points extra data, or NULL
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||
void *where)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
pcre_study_data internal_study;
|
||||
const real_pcre *re = (const real_pcre *)argument_re;
|
||||
const pcre_study_data *study = NULL;
|
||||
|
||||
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
||||
|
||||
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
|
||||
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
||||
if (study != NULL) study = &internal_study;
|
||||
}
|
||||
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_INFO_OPTIONS:
|
||||
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_SIZE:
|
||||
*((size_t *)where) = re->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_STUDYSIZE:
|
||||
*((size_t *)where) = (study == NULL)? 0 : study->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_CAPTURECOUNT:
|
||||
*((int *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_BACKREFMAX:
|
||||
*((int *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_FIRSTBYTE:
|
||||
*((int *)where) =
|
||||
((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
break;
|
||||
|
||||
/* Make sure we pass back the pointer to the bit vector in the external
|
||||
block, not the internal copy (with flipped integer fields). */
|
||||
|
||||
case PCRE_INFO_FIRSTTABLE:
|
||||
*((const uschar **)where) =
|
||||
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
|
||||
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_LASTLITERAL:
|
||||
*((int *)where) =
|
||||
((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMEENTRYSIZE:
|
||||
*((int *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMECOUNT:
|
||||
*((int *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMETABLE:
|
||||
*((const uschar **)where) = (const uschar *)re + re->name_table_offset;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_DEFAULT_TABLES:
|
||||
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_fullinfo.c */
|
461
glib/pcre/pcre_get.c
Normal file
461
glib/pcre/pcre_get.c
Normal file
@@ -0,0 +1,461 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains some convenience functions for extracting substrings
|
||||
from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer. */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used by the get_first_set() function below, as well
|
||||
as being generally available. It assumes that names are unique.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parentheses, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
uschar *nametable;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
uschar *entry = nametable + entrysize*mid;
|
||||
int c = strcmp(stringname, (char *)(entry + 2));
|
||||
if (c == 0) return (entry[0] << 8) + entry[1];
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This is used by the get_first_set() function below, as well as being
|
||||
generally available. It is used when duplicated names are permitted.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: the length of each entry, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||
char **firstptr, char **lastptr)
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
uschar *nametable, *lastentry;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
|
||||
lastentry = nametable + entrysize * (top - 1);
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
uschar *entry = nametable + entrysize*mid;
|
||||
int c = strcmp(stringname, (char *)(entry + 2));
|
||||
if (c == 0)
|
||||
{
|
||||
uschar *first = entry;
|
||||
uschar *last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
*firstptr = (char *)first;
|
||||
*lastptr = (char *)last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find first set of multiple named strings *
|
||||
*************************************************/
|
||||
|
||||
/* This function allows for duplicate names in the table of named substrings.
|
||||
It returns the number of the first one that was set in a pattern match.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name of the capturing substring
|
||||
ovector the vector of matched substrings
|
||||
|
||||
Returns: the number of the first that is set,
|
||||
or the number of the last one if none are set,
|
||||
or a negative number on error
|
||||
*/
|
||||
|
||||
static int
|
||||
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
||||
{
|
||||
const real_pcre *re = (const real_pcre *)code;
|
||||
int entrysize;
|
||||
char *first, *last;
|
||||
uschar *entry;
|
||||
if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
|
||||
return pcre_get_stringnumber(code, stringname);
|
||||
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||
if (entrysize <= 0) return entrysize;
|
||||
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
|
||||
{
|
||||
int n = (entry[0] << 8) + entry[1];
|
||||
if (ovector[n*2] >= 0) return n;
|
||||
}
|
||||
return (first[0] << 8) + first[1];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer.
|
||||
Note that we use memcpy() rather than strncpy() in case there are binary zeros
|
||||
in the string.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, char *buffer, int size)
|
||||
{
|
||||
int yield;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(buffer, subject + ovector[stringnumber], yield);
|
||||
buffer[yield] = 0;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, char *buffer, int size)
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy all captured strings to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of store and builds a list of pointers and all
|
||||
of the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
listptr set to point to the list of pointers
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||
const char ***listptr)
|
||||
{
|
||||
int i;
|
||||
int size = sizeof(char *);
|
||||
int double_count = stringcount * 2;
|
||||
char **stringlist;
|
||||
char *p;
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
|
||||
|
||||
stringlist = (char **)(pcre_malloc)(size);
|
||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
|
||||
*listptr = (const char **)stringlist;
|
||||
p = (char *)(stringlist + stringcount + 1);
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
{
|
||||
int len = ovector[i+1] - ovector[i];
|
||||
memcpy(p, subject + ovector[i], len);
|
||||
*stringlist++ = p;
|
||||
p += len;
|
||||
*p++ = 0;
|
||||
}
|
||||
|
||||
*stringlist = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring_list *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring_list()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
pcre_free_substring_list(const char **pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
store
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the substring
|
||||
|
||||
Returns: if successful:
|
||||
the length of the string, not including the zero that
|
||||
is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, const char **stringptr)
|
||||
{
|
||||
int yield;
|
||||
char *substring;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
substring = (char *)(pcre_malloc)(yield + 1);
|
||||
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(substring, subject + ovector[stringnumber], yield);
|
||||
substring[yield] = 0;
|
||||
*stringptr = substring;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new store. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, const char **stringptr)
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
pcre_free_substring(const char *pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
}
|
||||
|
||||
/* End of pcre_get.c */
|
59
glib/pcre/pcre_globals.c
Normal file
59
glib/pcre/pcre_globals.c
Normal file
@@ -0,0 +1,59 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains global variables that are exported by the PCRE library.
|
||||
PCRE is thread-clean and doesn't use any global variables in the normal sense.
|
||||
However, it calls memory allocation and freeing functions via the four
|
||||
indirections below, and it can optionally do callouts, using the fifth
|
||||
indirection. These values can be changed by the caller, but are shared between
|
||||
all threads. However, when compiling for Virtual Pascal, things are done
|
||||
differently, and global variables are not used (see pcre.in). */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#else
|
||||
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
89
glib/pcre/pcre_info.c
Normal file
89
glib/pcre/pcre_info.c
Normal file
@@ -0,0 +1,89 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_info(), which gives some
|
||||
information about a compiled pattern. However, use of this function is now
|
||||
deprecated, as it has been superseded by pcre_fullinfo(). */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* (Obsolete) Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is the original "info" function. It picks potentially useful data out
|
||||
of the private structure, but its interface was too rigid. It remains for
|
||||
backwards compatibility. The public options are passed back in an int - though
|
||||
the re->options field has been expanded to a long int, all the public options
|
||||
at the low end of it, and so even on 16-bit systems this will still be OK.
|
||||
Therefore, I haven't changed the API for pcre_info().
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
optptr where to pass back the options
|
||||
first_byte where to pass back the first character,
|
||||
or -1 if multiline and all branches start ^,
|
||||
or -2 otherwise
|
||||
|
||||
Returns: number of capturing subpatterns
|
||||
or negative values on error
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
const real_pcre *re = (const real_pcre *)argument_re;
|
||||
if (re == NULL) return PCRE_ERROR_NULL;
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
|
||||
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
||||
}
|
||||
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
|
||||
if (first_byte != NULL)
|
||||
*first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
return re->top_bracket;
|
||||
}
|
||||
|
||||
/* End of pcre_info.c */
|
1041
glib/pcre/pcre_internal.h
Normal file
1041
glib/pcre/pcre_internal.h
Normal file
File diff suppressed because it is too large
Load Diff
140
glib/pcre/pcre_maketables.c
Normal file
140
glib/pcre/pcre_maketables.c
Normal file
@@ -0,0 +1,140 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_maketables(), which builds
|
||||
character tables for PCRE in the current locale. The file is compiled on its
|
||||
own as part of the PCRE library. However, it is also included in the
|
||||
compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
|
||||
|
||||
#ifndef DFTABLES
|
||||
#include "pcre_internal.h"
|
||||
#endif
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create PCRE character tables *
|
||||
*************************************************/
|
||||
|
||||
/* This function builds a set of character tables for use by PCRE and returns
|
||||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via pcre_malloc(), but when compiled
|
||||
inside dftables, use malloc().
|
||||
|
||||
Arguments: none
|
||||
Returns: pointer to the contiguous block of data
|
||||
*/
|
||||
|
||||
const unsigned char *
|
||||
pcre_maketables(void)
|
||||
{
|
||||
unsigned char *yield, *p;
|
||||
int i;
|
||||
|
||||
#ifndef DFTABLES
|
||||
yield = (unsigned char*)(pcre_malloc)(tables_length);
|
||||
#else
|
||||
yield = (unsigned char*)malloc(tables_length);
|
||||
#endif
|
||||
|
||||
if (yield == NULL) return NULL;
|
||||
p = yield;
|
||||
|
||||
/* First comes the lower casing table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||
|
||||
/* Next the case-flipping table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort on
|
||||
exclusive ones - in some locales things may be different. Note that the table
|
||||
for "space" includes everything "isspace" gives, including VT in the default
|
||||
locale. This makes it work for the POSIX class [:space:]. Note also that it is
|
||||
possible for a character to be alnum or alpha without being lower or upper,
|
||||
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
|
||||
least under Debian Linux's locales as of 12/2005). So we must test for alnum
|
||||
specially. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
|
||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table. In this, we exclude VT from the white
|
||||
space chars, because Perl doesn't recognize it as such for \s and for comments
|
||||
within regexes. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int x = 0;
|
||||
if (i != 0x0b && isspace(i)) x += ctype_space;
|
||||
if (isalpha(i)) x += ctype_letter;
|
||||
if (isdigit(i)) x += ctype_digit;
|
||||
if (isxdigit(i)) x += ctype_xdigit;
|
||||
if (isalnum(i) || i == '_') x += ctype_word;
|
||||
|
||||
/* Note: strchr includes the terminating zero in the characters it considers.
|
||||
In this instance, that is ok because we want binary zero to be flagged as a
|
||||
meta-character, which in this sense is any character that terminates a run
|
||||
of data characters. */
|
||||
|
||||
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
|
||||
*p++ = x;
|
||||
}
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* End of pcre_maketables.c */
|
135
glib/pcre/pcre_newline.c
Normal file
135
glib/pcre/pcre_newline.c
Normal file
@@ -0,0 +1,135 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL,
|
||||
so for now the type isn't passed into the functions. It can easily be added
|
||||
later if required. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is less than the end of the
|
||||
string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf8 TRUE if in utf8 mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr,
|
||||
BOOL utf8)
|
||||
{
|
||||
int c;
|
||||
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
switch(c)
|
||||
{
|
||||
case 0x000a: /* LF */
|
||||
case 0x000b: /* VT */
|
||||
case 0x000c: *lenptr = 1; return TRUE; /* FF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is greater than the start of
|
||||
the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf8 TRUE if in utf8 mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr,
|
||||
BOOL utf8)
|
||||
{
|
||||
int c;
|
||||
ptr--;
|
||||
if (utf8)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000b: /* VT */
|
||||
case 0x000c: /* FF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre_newline.c */
|
78
glib/pcre/pcre_ord2utf8.c
Normal file
78
glib/pcre/pcre_ord2utf8.c
Normal file
@@ -0,0 +1,78 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert character value to UTF-8 *
|
||||
*************************************************/
|
||||
|
||||
/* This function takes an integer value in the range 0 - 0x7fffffff
|
||||
and encodes it as a UTF-8 character in 0 to 6 bytes.
|
||||
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result - at least 6 bytes long
|
||||
|
||||
Returns: number of characters placed in the buffer
|
||||
*/
|
||||
|
||||
int
|
||||
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
||||
{
|
||||
register int i, j;
|
||||
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
||||
if (cvalue <= _pcre_utf8_table1[i]) break;
|
||||
buffer += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
/* End of pcre_ord2utf8.c */
|
77
glib/pcre/pcre_refcount.c
Normal file
77
glib/pcre/pcre_refcount.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_refcount(), which is an
|
||||
auxiliary function that can be used to maintain a reference count in a compiled
|
||||
pattern data block. This might be helpful in applications where the block is
|
||||
shared by different users. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Maintain reference count *
|
||||
*************************************************/
|
||||
|
||||
/* The reference count is a 16-bit field, initialized to zero. It is not
|
||||
possible to transfer a non-zero count from one host to a different host that
|
||||
has a different byte order - though I can't see why anyone in their right mind
|
||||
would ever want to do that!
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
adjust value to add to the count
|
||||
|
||||
Returns: the (possibly updated) count value (a non-negative number), or
|
||||
a negative error number
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
pcre_refcount(pcre *argument_re, int adjust)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)argument_re;
|
||||
if (re == NULL) return PCRE_ERROR_NULL;
|
||||
re->ref_count = (-adjust > re->ref_count)? 0 :
|
||||
(adjust + re->ref_count > 65535)? 65535 :
|
||||
re->ref_count + adjust;
|
||||
return re->ref_count;
|
||||
}
|
||||
|
||||
/* End of pcre_refcount.c */
|
570
glib/pcre/pcre_study.c
Normal file
570
glib/pcre/pcre_study.c
Normal file
@@ -0,0 +1,570 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_study(), along with local
|
||||
supporting functions. */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/* Returns from set_start_bits() */
|
||||
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set a bit and maybe its alternate case *
|
||||
*************************************************/
|
||||
|
||||
/* Given a character, set its bit in the table, and also the bit for the other
|
||||
version of a letter if we are caseless.
|
||||
|
||||
Arguments:
|
||||
start_bits points to the bit map
|
||||
c is the character
|
||||
caseless the caseless flag
|
||||
cd the block with char table pointers
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
|
||||
{
|
||||
start_bits[c/8] |= (1 << (c&7));
|
||||
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
||||
start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create bitmap of starting bytes *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans a compiled unanchored expression recursively and
|
||||
attempts to build a bitmap of the set of possible starting bytes. As time goes
|
||||
by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
|
||||
useful for parenthesized groups in patterns such as (a*)b where the group
|
||||
provides some optional starting bytes but scanning must continue at the outer
|
||||
level to find at least one mandatory byte. At the outermost level, this
|
||||
function fails unless the result is SSB_DONE.
|
||||
|
||||
Arguments:
|
||||
code points to an expression
|
||||
start_bits points to a 32-byte table, initialized to 0
|
||||
caseless the current state of the caseless flag
|
||||
utf8 TRUE if in UTF-8 mode
|
||||
cd the block with char table pointers
|
||||
|
||||
Returns: SSB_FAIL => Failed to find any starting bytes
|
||||
SSB_DONE => Found mandatory starting bytes
|
||||
SSB_CONTINUE => Found optional starting bytes
|
||||
*/
|
||||
|
||||
static int
|
||||
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
||||
BOOL utf8, compile_data *cd)
|
||||
{
|
||||
register int c;
|
||||
int yield = SSB_DONE;
|
||||
|
||||
#if 0
|
||||
/* ========================================================================= */
|
||||
/* The following comment and code was inserted in January 1999. In May 2006,
|
||||
when it was observed to cause compiler warnings about unused values, I took it
|
||||
out again. If anybody is still using OS/2, they will have to put it back
|
||||
manually. */
|
||||
|
||||
/* This next statement and the later reference to dummy are here in order to
|
||||
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
||||
code. Apparently IBM isn't going to fix the problem, and we would rather not
|
||||
disable optimization (in this module it actually makes a big difference, and
|
||||
the pcre module can use all the optimization it can get). */
|
||||
|
||||
volatile int dummy;
|
||||
/* ========================================================================= */
|
||||
#endif
|
||||
|
||||
do
|
||||
{
|
||||
const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
|
||||
BOOL try_next = TRUE;
|
||||
|
||||
while (try_next) /* Loop for items in this branch */
|
||||
{
|
||||
int rc;
|
||||
switch(*tcode)
|
||||
{
|
||||
/* Fail if we reach something we don't understand */
|
||||
|
||||
default:
|
||||
return SSB_FAIL;
|
||||
|
||||
/* If we hit a bracket or a positive lookahead assertion, recurse to set
|
||||
bits from within the subpattern. If it can't find anything, we have to
|
||||
give up. If it finds some mandatory character(s), we are done for this
|
||||
branch. Otherwise, carry on scanning after the subpattern. */
|
||||
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_ONCE:
|
||||
case OP_ASSERT:
|
||||
rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
|
||||
if (rc == SSB_FAIL) return SSB_FAIL;
|
||||
if (rc == SSB_DONE) try_next = FALSE; else
|
||||
{
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
}
|
||||
break;
|
||||
|
||||
/* If we hit ALT or KET, it means we haven't found anything mandatory in
|
||||
this branch, though we might have found something optional. For ALT, we
|
||||
continue with the next alternative, but we have to arrange that the final
|
||||
result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
|
||||
return SSB_CONTINUE: if this is the top level, that indicates failure,
|
||||
but after a nested subpattern, it causes scanning to continue. */
|
||||
|
||||
case OP_ALT:
|
||||
yield = SSB_CONTINUE;
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_KET:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
return SSB_CONTINUE;
|
||||
|
||||
/* Skip over callout */
|
||||
|
||||
case OP_CALLOUT:
|
||||
tcode += 2 + 2*LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over lookbehind and negative lookahead assertions */
|
||||
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over an option setting, changing the caseless flag */
|
||||
|
||||
case OP_OPT:
|
||||
caseless = (tcode[1] & PCRE_CASELESS) != 0;
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* BRAZERO does the bracket, but carries on. */
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
|
||||
return SSB_FAIL;
|
||||
/* =========================================================================
|
||||
See the comment at the head of this function concerning the next line,
|
||||
which was an old fudge for the benefit of OS/2.
|
||||
dummy = 1;
|
||||
========================================================================= */
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Single-char * or ? sets the bit and tries the next item */
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
tcode += 2;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && tcode[-1] >= 0xc0)
|
||||
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* Single-char upto sets the bit and tries the next */
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
set_bit(start_bits, tcode[3], caseless, cd);
|
||||
tcode += 4;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && tcode[-1] >= 0xc0)
|
||||
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* At least one single char sets the bit and stops */
|
||||
|
||||
case OP_EXACT: /* Fall through */
|
||||
tcode += 2;
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARNC:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* Single character type sets the bits and stops */
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= ~d;
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= d;
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_word];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* One or more character type fudges the pointer and restarts, knowing
|
||||
it will hit a single character type and stop there. */
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
tcode += 3;
|
||||
break;
|
||||
|
||||
/* Zero or more repeats of character types set the bits and then
|
||||
try again. */
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
tcode += 2; /* Fall through */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
switch(tcode[1])
|
||||
{
|
||||
case OP_ANY:
|
||||
return SSB_FAIL;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= ~d;
|
||||
}
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= d;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_word];
|
||||
break;
|
||||
}
|
||||
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* Character class where all the information is in a bit map: set the
|
||||
bits and either carry on or not, according to the repeat count. If it was
|
||||
a negative class, and we are operating with UTF-8 characters, any byte
|
||||
with a value >= 0xc4 is a potentially valid starter because it starts a
|
||||
character with a value > 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
if (utf8)
|
||||
{
|
||||
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
case OP_CLASS:
|
||||
{
|
||||
tcode++;
|
||||
|
||||
/* In UTF-8 mode, the bits in a bit map correspond to character
|
||||
values, not to byte values. However, the bit map we are constructing is
|
||||
for byte values. So we have to do a conversion for characters whose
|
||||
value is > 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
if (utf8)
|
||||
{
|
||||
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
||||
for (c = 128; c < 256; c++)
|
||||
{
|
||||
if ((tcode[c/8] && (1 << (c&7))) != 0)
|
||||
{
|
||||
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
||||
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
||||
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
||||
|
||||
else
|
||||
{
|
||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||
}
|
||||
|
||||
/* Advance past the bit map, and act on what follows */
|
||||
|
||||
tcode += 32;
|
||||
switch (*tcode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
|
||||
else try_next = FALSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
try_next = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break; /* End of bitmap class handling */
|
||||
|
||||
} /* End of switch */
|
||||
} /* End of try_next loop */
|
||||
|
||||
code += GET(code, 1); /* Advance to next branch */
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Study a compiled expression *
|
||||
*************************************************/
|
||||
|
||||
/* This function is handed a compiled expression that it must study to produce
|
||||
information that will speed up the matching. It returns a pcre_extra block
|
||||
which then gets handed back to pcre_exec().
|
||||
|
||||
Arguments:
|
||||
re points to the compiled expression
|
||||
options contains option bits
|
||||
errorptr points to where to place error messages;
|
||||
set NULL unless error
|
||||
|
||||
Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
appropriate flag set;
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE pcre_extra *
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
pcre_extra *extra;
|
||||
pcre_study_data *study;
|
||||
const uschar *tables;
|
||||
uschar *code;
|
||||
compile_data compile_block;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
|
||||
*errorptr = NULL;
|
||||
|
||||
if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
*errorptr = "argument is not a compiled regular expression";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
||||
{
|
||||
*errorptr = "unknown or incorrect option bit(s) set";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
code = (uschar *)re + re->name_table_offset +
|
||||
(re->name_count * re->name_entry_size);
|
||||
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
|
||||
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
return NULL;
|
||||
|
||||
/* Set the character tables in the block that is passed around */
|
||||
|
||||
tables = re->tables;
|
||||
if (tables == NULL)
|
||||
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
|
||||
(void *)(&tables));
|
||||
|
||||
compile_block.lcc = tables + lcc_offset;
|
||||
compile_block.fcc = tables + fcc_offset;
|
||||
compile_block.cbits = tables + cbits_offset;
|
||||
compile_block.ctypes = tables + ctypes_offset;
|
||||
|
||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||
|
||||
memset(start_bits, 0, 32 * sizeof(uschar));
|
||||
if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
(re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
|
||||
|
||||
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
||||
the latter, which is pointed to by the former, which may also get additional
|
||||
data set later by the calling program. At the moment, the size of
|
||||
pcre_study_data is fixed. We nevertheless save it in a field for returning via
|
||||
the pcre_fullinfo() function so that if it becomes variable in the future, we
|
||||
don't have to change that code. */
|
||||
|
||||
extra = (pcre_extra *)(pcre_malloc)
|
||||
(sizeof(pcre_extra) + sizeof(pcre_study_data));
|
||||
|
||||
if (extra == NULL)
|
||||
{
|
||||
*errorptr = "failed to get memory";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
|
||||
extra->flags = PCRE_EXTRA_STUDY_DATA;
|
||||
extra->study_data = study;
|
||||
|
||||
study->size = sizeof(pcre_study_data);
|
||||
study->options = PCRE_STUDY_MAPPED;
|
||||
memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
||||
|
||||
return extra;
|
||||
}
|
||||
|
||||
/* End of pcre_study.c */
|
304
glib/pcre/pcre_tables.c
Normal file
304
glib/pcre/pcre_tables.c
Normal file
@@ -0,0 +1,304 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. The tables are also #included by the pcretest program, which
|
||||
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||
clashes with the library. */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre_internal.h. */
|
||||
|
||||
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
const int _pcre_utf8_table1[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||
|
||||
const uschar _pcre_utf8_table4[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
/* This table translates Unicode property names into type and code values. It
|
||||
is searched by binary chop, so must be in collating sequence of name. */
|
||||
|
||||
const char _pcre_ucp_names[] =
|
||||
"Any\0"
|
||||
"Arabic\0"
|
||||
"Armenian\0"
|
||||
"Balinese\0"
|
||||
"Bengali\0"
|
||||
"Bopomofo\0"
|
||||
"Braille\0"
|
||||
"Buginese\0"
|
||||
"Buhid\0"
|
||||
"C\0"
|
||||
"Canadian_Aboriginal\0"
|
||||
"Cc\0"
|
||||
"Cf\0"
|
||||
"Cherokee\0"
|
||||
"Cn\0"
|
||||
"Co\0"
|
||||
"Common\0"
|
||||
"Coptic\0"
|
||||
"Cs\0"
|
||||
"Cuneiform\0"
|
||||
"Cypriot\0"
|
||||
"Cyrillic\0"
|
||||
"Deseret\0"
|
||||
"Devanagari\0"
|
||||
"Ethiopic\0"
|
||||
"Georgian\0"
|
||||
"Glagolitic\0"
|
||||
"Gothic\0"
|
||||
"Greek\0"
|
||||
"Gujarati\0"
|
||||
"Gurmukhi\0"
|
||||
"Han\0"
|
||||
"Hangul\0"
|
||||
"Hanunoo\0"
|
||||
"Hebrew\0"
|
||||
"Hiragana\0"
|
||||
"Inherited\0"
|
||||
"Kannada\0"
|
||||
"Katakana\0"
|
||||
"Kharoshthi\0"
|
||||
"Khmer\0"
|
||||
"L\0"
|
||||
"L&\0"
|
||||
"Lao\0"
|
||||
"Latin\0"
|
||||
"Limbu\0"
|
||||
"Linear_B\0"
|
||||
"Ll\0"
|
||||
"Lm\0"
|
||||
"Lo\0"
|
||||
"Lt\0"
|
||||
"Lu\0"
|
||||
"M\0"
|
||||
"Malayalam\0"
|
||||
"Mc\0"
|
||||
"Me\0"
|
||||
"Mn\0"
|
||||
"Mongolian\0"
|
||||
"Myanmar\0"
|
||||
"N\0"
|
||||
"Nd\0"
|
||||
"New_Tai_Lue\0"
|
||||
"Nko\0"
|
||||
"Nl\0"
|
||||
"No\0"
|
||||
"Ogham\0"
|
||||
"Old_Italic\0"
|
||||
"Old_Persian\0"
|
||||
"Oriya\0"
|
||||
"Osmanya\0"
|
||||
"P\0"
|
||||
"Pc\0"
|
||||
"Pd\0"
|
||||
"Pe\0"
|
||||
"Pf\0"
|
||||
"Phags_Pa\0"
|
||||
"Phoenician\0"
|
||||
"Pi\0"
|
||||
"Po\0"
|
||||
"Ps\0"
|
||||
"Runic\0"
|
||||
"S\0"
|
||||
"Sc\0"
|
||||
"Shavian\0"
|
||||
"Sinhala\0"
|
||||
"Sk\0"
|
||||
"Sm\0"
|
||||
"So\0"
|
||||
"Syloti_Nagri\0"
|
||||
"Syriac\0"
|
||||
"Tagalog\0"
|
||||
"Tagbanwa\0"
|
||||
"Tai_Le\0"
|
||||
"Tamil\0"
|
||||
"Telugu\0"
|
||||
"Thaana\0"
|
||||
"Thai\0"
|
||||
"Tibetan\0"
|
||||
"Tifinagh\0"
|
||||
"Ugaritic\0"
|
||||
"Yi\0"
|
||||
"Z\0"
|
||||
"Zl\0"
|
||||
"Zp\0"
|
||||
"Zs\0";
|
||||
|
||||
const ucp_type_table _pcre_utt[] = {
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Balinese },
|
||||
{ 29, PT_SC, ucp_Bengali },
|
||||
{ 37, PT_SC, ucp_Bopomofo },
|
||||
{ 46, PT_SC, ucp_Braille },
|
||||
{ 54, PT_SC, ucp_Buginese },
|
||||
{ 63, PT_SC, ucp_Buhid },
|
||||
{ 69, PT_GC, ucp_C },
|
||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 91, PT_PC, ucp_Cc },
|
||||
{ 94, PT_PC, ucp_Cf },
|
||||
{ 97, PT_SC, ucp_Cherokee },
|
||||
{ 106, PT_PC, ucp_Cn },
|
||||
{ 109, PT_PC, ucp_Co },
|
||||
{ 112, PT_SC, ucp_Common },
|
||||
{ 119, PT_SC, ucp_Coptic },
|
||||
{ 126, PT_PC, ucp_Cs },
|
||||
{ 129, PT_SC, ucp_Cuneiform },
|
||||
{ 139, PT_SC, ucp_Cypriot },
|
||||
{ 147, PT_SC, ucp_Cyrillic },
|
||||
{ 156, PT_SC, ucp_Deseret },
|
||||
{ 164, PT_SC, ucp_Devanagari },
|
||||
{ 175, PT_SC, ucp_Ethiopic },
|
||||
{ 184, PT_SC, ucp_Georgian },
|
||||
{ 193, PT_SC, ucp_Glagolitic },
|
||||
{ 204, PT_SC, ucp_Gothic },
|
||||
{ 211, PT_SC, ucp_Greek },
|
||||
{ 217, PT_SC, ucp_Gujarati },
|
||||
{ 226, PT_SC, ucp_Gurmukhi },
|
||||
{ 235, PT_SC, ucp_Han },
|
||||
{ 239, PT_SC, ucp_Hangul },
|
||||
{ 246, PT_SC, ucp_Hanunoo },
|
||||
{ 254, PT_SC, ucp_Hebrew },
|
||||
{ 261, PT_SC, ucp_Hiragana },
|
||||
{ 270, PT_SC, ucp_Inherited },
|
||||
{ 280, PT_SC, ucp_Kannada },
|
||||
{ 288, PT_SC, ucp_Katakana },
|
||||
{ 297, PT_SC, ucp_Kharoshthi },
|
||||
{ 308, PT_SC, ucp_Khmer },
|
||||
{ 314, PT_GC, ucp_L },
|
||||
{ 316, PT_LAMP, 0 },
|
||||
{ 319, PT_SC, ucp_Lao },
|
||||
{ 323, PT_SC, ucp_Latin },
|
||||
{ 329, PT_SC, ucp_Limbu },
|
||||
{ 335, PT_SC, ucp_Linear_B },
|
||||
{ 344, PT_PC, ucp_Ll },
|
||||
{ 347, PT_PC, ucp_Lm },
|
||||
{ 350, PT_PC, ucp_Lo },
|
||||
{ 353, PT_PC, ucp_Lt },
|
||||
{ 356, PT_PC, ucp_Lu },
|
||||
{ 359, PT_GC, ucp_M },
|
||||
{ 361, PT_SC, ucp_Malayalam },
|
||||
{ 371, PT_PC, ucp_Mc },
|
||||
{ 374, PT_PC, ucp_Me },
|
||||
{ 377, PT_PC, ucp_Mn },
|
||||
{ 380, PT_SC, ucp_Mongolian },
|
||||
{ 390, PT_SC, ucp_Myanmar },
|
||||
{ 398, PT_GC, ucp_N },
|
||||
{ 400, PT_PC, ucp_Nd },
|
||||
{ 403, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 415, PT_SC, ucp_Nko },
|
||||
{ 419, PT_PC, ucp_Nl },
|
||||
{ 422, PT_PC, ucp_No },
|
||||
{ 425, PT_SC, ucp_Ogham },
|
||||
{ 431, PT_SC, ucp_Old_Italic },
|
||||
{ 442, PT_SC, ucp_Old_Persian },
|
||||
{ 454, PT_SC, ucp_Oriya },
|
||||
{ 460, PT_SC, ucp_Osmanya },
|
||||
{ 468, PT_GC, ucp_P },
|
||||
{ 470, PT_PC, ucp_Pc },
|
||||
{ 473, PT_PC, ucp_Pd },
|
||||
{ 476, PT_PC, ucp_Pe },
|
||||
{ 479, PT_PC, ucp_Pf },
|
||||
{ 482, PT_SC, ucp_Phags_Pa },
|
||||
{ 491, PT_SC, ucp_Phoenician },
|
||||
{ 502, PT_PC, ucp_Pi },
|
||||
{ 505, PT_PC, ucp_Po },
|
||||
{ 508, PT_PC, ucp_Ps },
|
||||
{ 511, PT_SC, ucp_Runic },
|
||||
{ 517, PT_GC, ucp_S },
|
||||
{ 519, PT_PC, ucp_Sc },
|
||||
{ 522, PT_SC, ucp_Shavian },
|
||||
{ 530, PT_SC, ucp_Sinhala },
|
||||
{ 538, PT_PC, ucp_Sk },
|
||||
{ 541, PT_PC, ucp_Sm },
|
||||
{ 544, PT_PC, ucp_So },
|
||||
{ 547, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 560, PT_SC, ucp_Syriac },
|
||||
{ 567, PT_SC, ucp_Tagalog },
|
||||
{ 575, PT_SC, ucp_Tagbanwa },
|
||||
{ 584, PT_SC, ucp_Tai_Le },
|
||||
{ 591, PT_SC, ucp_Tamil },
|
||||
{ 597, PT_SC, ucp_Telugu },
|
||||
{ 604, PT_SC, ucp_Thaana },
|
||||
{ 611, PT_SC, ucp_Thai },
|
||||
{ 616, PT_SC, ucp_Tibetan },
|
||||
{ 624, PT_SC, ucp_Tifinagh },
|
||||
{ 633, PT_SC, ucp_Ugaritic },
|
||||
{ 642, PT_SC, ucp_Yi },
|
||||
{ 645, PT_GC, ucp_Z },
|
||||
{ 647, PT_PC, ucp_Zl },
|
||||
{ 650, PT_PC, ucp_Zp },
|
||||
{ 653, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||
|
||||
/* End of pcre_tables.c */
|
132
glib/pcre/pcre_try_flipped.c
Normal file
132
glib/pcre/pcre_try_flipped.c
Normal file
@@ -0,0 +1,132 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that tests a compiled pattern to
|
||||
see if it was compiled with the opposite endianness. If so, it uses an
|
||||
auxiliary local function to flip the appropriate bytes. */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Flip bytes in an integer *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called when the magic number in a regex doesn't match, in
|
||||
order to flip its bytes to see if we are dealing with a pattern that was
|
||||
compiled on a host of different endianness. If so, this function is used to
|
||||
flip other byte values.
|
||||
|
||||
Arguments:
|
||||
value the number to flip
|
||||
n the number of bytes to flip (assumed to be 2 or 4)
|
||||
|
||||
Returns: the flipped value
|
||||
*/
|
||||
|
||||
static unsigned long int
|
||||
byteflip(unsigned long int value, int n)
|
||||
{
|
||||
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
|
||||
return ((value & 0x000000ff) << 24) |
|
||||
((value & 0x0000ff00) << 8) |
|
||||
((value & 0x00ff0000) >> 8) |
|
||||
((value & 0xff000000) >> 24);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Test for a byte-flipped compiled regex *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
|
||||
pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
|
||||
is, it was compiled on a system of opposite endianness. The function is called
|
||||
only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
|
||||
we flip all the relevant values into a different data block, and return it.
|
||||
|
||||
Arguments:
|
||||
re points to the regex
|
||||
study points to study data, or NULL
|
||||
internal_re points to a new regex block
|
||||
internal_study points to a new study block
|
||||
|
||||
Returns: the new block if is is indeed a byte-flipped regex
|
||||
NULL if it is not
|
||||
*/
|
||||
|
||||
real_pcre *
|
||||
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
|
||||
const pcre_study_data *study, pcre_study_data *internal_study)
|
||||
{
|
||||
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
|
||||
return NULL;
|
||||
|
||||
*internal_re = *re; /* To copy other fields */
|
||||
internal_re->size = byteflip(re->size, sizeof(re->size));
|
||||
internal_re->options = byteflip(re->options, sizeof(re->options));
|
||||
internal_re->top_bracket =
|
||||
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
|
||||
internal_re->top_backref =
|
||||
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
|
||||
internal_re->first_byte =
|
||||
(pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
|
||||
internal_re->req_byte =
|
||||
(pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
|
||||
internal_re->name_table_offset =
|
||||
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
|
||||
internal_re->name_entry_size =
|
||||
(pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
|
||||
internal_re->name_count =
|
||||
(pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
|
||||
|
||||
if (study != NULL)
|
||||
{
|
||||
*internal_study = *study; /* To copy other fields */
|
||||
internal_study->size = byteflip(study->size, sizeof(study->size));
|
||||
internal_study->options = byteflip(study->options, sizeof(study->options));
|
||||
}
|
||||
|
||||
return internal_re;
|
||||
}
|
||||
|
||||
/* End of pcre_tryflipped.c */
|
126
glib/pcre/pcre_ucp_searchfuncs.c
Normal file
126
glib/pcre/pcre_ucp_searchfuncs.c
Normal file
@@ -0,0 +1,126 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This file has been modified to use glib instead of the internal table
|
||||
* in ucptable.c -- Marco Barisione */
|
||||
|
||||
/* This module contains code for searching the table of Unicode character
|
||||
properties. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#include "ucp.h" /* Category definitions */
|
||||
#include "ucpinternal.h" /* Internal table details */
|
||||
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
static int ucp_gentype[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Search table and return type *
|
||||
*************************************************/
|
||||
|
||||
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
||||
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
type_ptr the detailed character type is returned here
|
||||
script_ptr the script is returned here
|
||||
|
||||
Returns: the character type category
|
||||
*/
|
||||
|
||||
int
|
||||
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
|
||||
{
|
||||
/* Note that the Unicode types have the same values in glib and in
|
||||
* PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
|
||||
* ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
|
||||
*type_ptr = g_unichar_type(c);
|
||||
*script_ptr = g_unichar_get_script(c);
|
||||
return ucp_gentype[*type_ptr];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Search table and return other case *
|
||||
*************************************************/
|
||||
|
||||
/* If the given character is a letter, and there is another case for the
|
||||
letter, return the other case. Otherwise, return -1.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
|
||||
Returns: the other case or NOTACHAR if none
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
_pcre_ucp_othercase(const unsigned int c)
|
||||
{
|
||||
int other_case = NOTACHAR;
|
||||
|
||||
if (g_unichar_islower(c))
|
||||
other_case = g_unichar_toupper(c);
|
||||
else if (g_unichar_isupper(c))
|
||||
other_case = g_unichar_tolower(c);
|
||||
|
||||
if (other_case == c)
|
||||
other_case = NOTACHAR;
|
||||
|
||||
return other_case;
|
||||
}
|
||||
|
||||
|
||||
/* End of pcre_ucp_searchfuncs.c */
|
13
glib/pcre/pcre_valid_utf8.c
Normal file
13
glib/pcre/pcre_valid_utf8.c
Normal file
@@ -0,0 +1,13 @@
|
||||
#include "pcre_internal.h"
|
||||
|
||||
/*
|
||||
* This function is not needed by GRegex, so print an error and
|
||||
* return always -1, that is the string is a valid UTF-8 encoded
|
||||
* string.
|
||||
*/
|
||||
int
|
||||
_pcre_valid_utf8(const uschar *string, int length)
|
||||
{
|
||||
g_warning ("%s: this function should not be called", G_STRLOC);
|
||||
return -1;
|
||||
}
|
86
glib/pcre/pcre_version.c
Normal file
86
glib/pcre/pcre_version.c
Normal file
@@ -0,0 +1,86 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_version(), which returns a
|
||||
string that identifies the PCRE version that is in use. */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return version string *
|
||||
*************************************************/
|
||||
|
||||
/* These macros are the standard way of turning unquoted text into C strings.
|
||||
They allow macros like PCRE_MAJOR to be defined without quotes, which is
|
||||
convenient for user programs that want to test its value. */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
|
||||
production releases. Originally, it was used naively in this code:
|
||||
|
||||
return XSTRING(PCRE_MAJOR)
|
||||
"." XSTRING(PCRE_MINOR)
|
||||
XSTRING(PCRE_PRERELEASE)
|
||||
" " XSTRING(PCRE_DATE);
|
||||
|
||||
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
|
||||
STRING(). The C standard states: "If (before argument substitution) any
|
||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what we
|
||||
really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. To cope with both ways of
|
||||
handling this, I had resort to some messy hackery that does a test at run time.
|
||||
I could find no way of detecting that a macro is defined as an empty string at
|
||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||
the STRING macro with an empty argument when doing the test. */
|
||||
|
||||
PCRE_DATA_SCOPE const char *
|
||||
pcre_version(void)
|
||||
{
|
||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
|
||||
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
|
||||
}
|
||||
|
||||
/* End of pcre_version.c */
|
144
glib/pcre/pcre_xclass.c
Normal file
144
glib/pcre/pcre_xclass.c
Normal file
@@ -0,0 +1,144 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class (one that contains characters whose values are > 255). It is used by both
|
||||
pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain values > 255.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag byte of the XCLASS data
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_xclass(int c, const uschar *data)
|
||||
{
|
||||
int t;
|
||||
BOOL negated = (*data & XCL_NOT) != 0;
|
||||
|
||||
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||
not, we still carry on, because there may be ranges that start below 256 in the
|
||||
additional data. */
|
||||
|
||||
if (c < 256)
|
||||
{
|
||||
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
|
||||
return !negated; /* char found */
|
||||
}
|
||||
|
||||
/* First skip the bit map if present. Then match against the list of Unicode
|
||||
properties or large chars or ranges that end with a large char. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0) data += 32;
|
||||
|
||||
while ((t = *data++) != XCL_END)
|
||||
{
|
||||
int x, y;
|
||||
if (t == XCL_SINGLE)
|
||||
{
|
||||
GETCHARINC(x, data);
|
||||
if (c == x) return !negated;
|
||||
}
|
||||
else if (t == XCL_RANGE)
|
||||
{
|
||||
GETCHARINC(x, data);
|
||||
GETCHARINC(y, data);
|
||||
if (c >= x && c <= y) return !negated;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
int chartype, script;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
case PT_ANY:
|
||||
if (t == XCL_PROP) return !negated;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
|
||||
(t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
data += 2;
|
||||
}
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
|
||||
return negated; /* char did not match */
|
||||
}
|
||||
|
||||
/* End of pcre_xclass.c */
|
133
glib/pcre/ucp.h
Normal file
133
glib/pcre/ucp.h
Normal file
@@ -0,0 +1,133 @@
|
||||
/*************************************************
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
#ifndef _UCP_H
|
||||
#define _UCP_H
|
||||
|
||||
/* This file contains definitions of the property values that are returned by
|
||||
the function _pcre_ucp_findprop(). New values that are added for new releases
|
||||
of Unicode should always be at the end of each enum, for backwards
|
||||
compatibility. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
|
||||
enum {
|
||||
ucp_C, /* Other */
|
||||
ucp_L, /* Letter */
|
||||
ucp_M, /* Mark */
|
||||
ucp_N, /* Number */
|
||||
ucp_P, /* Punctuation */
|
||||
ucp_S, /* Symbol */
|
||||
ucp_Z /* Separator */
|
||||
};
|
||||
|
||||
/* These are the particular character types. */
|
||||
|
||||
enum {
|
||||
ucp_Cc, /* Control */
|
||||
ucp_Cf, /* Format */
|
||||
ucp_Cn, /* Unassigned */
|
||||
ucp_Co, /* Private use */
|
||||
ucp_Cs, /* Surrogate */
|
||||
ucp_Ll, /* Lower case letter */
|
||||
ucp_Lm, /* Modifier letter */
|
||||
ucp_Lo, /* Other letter */
|
||||
ucp_Lt, /* Title case letter */
|
||||
ucp_Lu, /* Upper case letter */
|
||||
ucp_Mc, /* Spacing mark */
|
||||
ucp_Me, /* Enclosing mark */
|
||||
ucp_Mn, /* Non-spacing mark */
|
||||
ucp_Nd, /* Decimal number */
|
||||
ucp_Nl, /* Letter number */
|
||||
ucp_No, /* Other number */
|
||||
ucp_Pc, /* Connector punctuation */
|
||||
ucp_Pd, /* Dash punctuation */
|
||||
ucp_Pe, /* Close punctuation */
|
||||
ucp_Pf, /* Final punctuation */
|
||||
ucp_Pi, /* Initial punctuation */
|
||||
ucp_Po, /* Other punctuation */
|
||||
ucp_Ps, /* Open punctuation */
|
||||
ucp_Sc, /* Currency symbol */
|
||||
ucp_Sk, /* Modifier symbol */
|
||||
ucp_Sm, /* Mathematical symbol */
|
||||
ucp_So, /* Other symbol */
|
||||
ucp_Zl, /* Line separator */
|
||||
ucp_Zp, /* Paragraph separator */
|
||||
ucp_Zs /* Space separator */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
|
||||
ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
|
||||
ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
|
||||
ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
|
||||
ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
|
||||
ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
|
||||
ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
|
||||
ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
|
||||
ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
|
||||
ucp_Common = G_UNICODE_SCRIPT_COMMON,
|
||||
ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
|
||||
ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
|
||||
ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
|
||||
ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
|
||||
ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
|
||||
ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
|
||||
ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
|
||||
ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
|
||||
ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
|
||||
ucp_Greek = G_UNICODE_SCRIPT_GREEK,
|
||||
ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
|
||||
ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
|
||||
ucp_Han = G_UNICODE_SCRIPT_HAN,
|
||||
ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
|
||||
ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
|
||||
ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
|
||||
ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
|
||||
ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
|
||||
ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
|
||||
ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
|
||||
ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
|
||||
ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
|
||||
ucp_Lao = G_UNICODE_SCRIPT_LAO,
|
||||
ucp_Latin = G_UNICODE_SCRIPT_LATIN,
|
||||
ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
|
||||
ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
|
||||
ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
|
||||
ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
|
||||
ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
|
||||
ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
|
||||
ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
|
||||
ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
|
||||
ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
|
||||
ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
|
||||
ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
|
||||
ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
|
||||
ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
|
||||
ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
|
||||
ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
|
||||
ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
|
||||
ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
|
||||
ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
|
||||
ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
|
||||
ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
|
||||
ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
|
||||
ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
|
||||
ucp_Thai = G_UNICODE_SCRIPT_THAI,
|
||||
ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
|
||||
ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
|
||||
ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
|
||||
ucp_Yi = G_UNICODE_SCRIPT_YI,
|
||||
ucp_Balinese = G_UNICODE_SCRIPT_BALINESE, /* New for Unicode 5.0.0 */
|
||||
ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, /* New for Unicode 5.0.0 */
|
||||
ucp_Nko = G_UNICODE_SCRIPT_NKO, /* New for Unicode 5.0.0 */
|
||||
ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, /* New for Unicode 5.0.0 */
|
||||
ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN /* New for Unicode 5.0.0 */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* End of ucp.h */
|
92
glib/pcre/ucpinternal.h
Normal file
92
glib/pcre/ucpinternal.h
Normal file
@@ -0,0 +1,92 @@
|
||||
/*************************************************
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
#ifndef _UCPINTERNAL_H
|
||||
#define _UCPINTERNAL_H
|
||||
|
||||
/* Internal header file defining the layout of the bits in each pair of 32-bit
|
||||
words that form a data item in the table. */
|
||||
|
||||
typedef struct cnode {
|
||||
pcre_uint32 f0;
|
||||
pcre_uint32 f1;
|
||||
} cnode;
|
||||
|
||||
/* Things for the f0 field */
|
||||
|
||||
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
||||
#define f0_scriptshift 24 /* Shift for script value */
|
||||
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
|
||||
#define f0_charmask 0x001fffff /* Mask for code point value */
|
||||
|
||||
/* Things for the f1 field */
|
||||
|
||||
#define f1_typemask 0xfc000000 /* Mask for char type field */
|
||||
#define f1_typeshift 26 /* Shift for the type field */
|
||||
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
|
||||
#define f1_casemask 0x0000ffff /* Mask for a case offset */
|
||||
#define f1_caseneg 0xffff8000 /* Bits for negation */
|
||||
|
||||
/* The data consists of a vector of structures of type cnode. The two unsigned
|
||||
32-bit integers are used as follows:
|
||||
|
||||
(f0) (1) The most significant byte holds the script number. The numbers are
|
||||
defined by the enum in ucp.h.
|
||||
|
||||
(2) The 0x00800000 bit is set if this entry defines a range of characters.
|
||||
It is not set if this entry defines a single character
|
||||
|
||||
(3) The 0x00600000 bits are spare.
|
||||
|
||||
(4) The 0x001fffff bits contain the code point. No Unicode code point will
|
||||
ever be greater than 0x0010ffff, so this should be OK for ever.
|
||||
|
||||
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
|
||||
defined by an enum in ucp.h.
|
||||
|
||||
(2) The 0x03ff0000 bits are spare.
|
||||
|
||||
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
|
||||
range if this entry defines a range, OR the *signed* offset to the
|
||||
character's "other case" partner if this entry defines a single
|
||||
character. There is no partner if the value is zero.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
|
||||
-------------------------------------------------------------------------------
|
||||
| | | | |
|
||||
| | |-> spare | |-> spare
|
||||
| | |
|
||||
| |-> spare |-> spare
|
||||
|
|
||||
|-> range flag
|
||||
|
||||
The upper/lower casing information is set only for characters that come in
|
||||
pairs. The non-one-to-one mappings in the Unicode data are ignored.
|
||||
|
||||
When searching the data, proceed as follows:
|
||||
|
||||
(1) Set up for a binary chop search.
|
||||
|
||||
(2) If the top is not greater than the bottom, the character is not in the
|
||||
table. Its type must therefore be "Cn" ("Undefined").
|
||||
|
||||
(3) Find the middle vector element.
|
||||
|
||||
(4) Extract the code point and compare. If equal, we are done.
|
||||
|
||||
(5) If the test character is smaller, set the top to the current point, and
|
||||
goto (2).
|
||||
|
||||
(6) If the current entry defines a range, compute the last character by adding
|
||||
the offset, and see if the test character is within the range. If it is,
|
||||
we are done.
|
||||
|
||||
(7) Otherwise, set the bottom to one element past the current point and goto
|
||||
(2).
|
||||
*/
|
||||
|
||||
#endif /* _UCPINTERNAL_H */
|
||||
|
||||
/* End of ucpinternal.h */
|
Reference in New Issue
Block a user