mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-12 07:26:15 +01:00
Update to PCRE 7.2
svn path=/trunk/; revision=5659
This commit is contained in:
parent
4067475919
commit
d966e93faf
@ -1,3 +1,7 @@
|
||||
2007-07-31 Matthias Clasen <mclasen@redhat.com>
|
||||
|
||||
* glib/pcre/*: Update the internal PCRE to 7.2
|
||||
|
||||
2007-07-31 Matthias Clasen <mclasen@redhat.com>
|
||||
|
||||
* glib/pltcheck.sh: Fix some glitches
|
||||
|
@ -1,68 +1,5 @@
|
||||
PCRE LICENCE
|
||||
------------
|
||||
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||
specified below. The documentation for PCRE, supplied in the "doc"
|
||||
directory, is distributed under the same terms as the software itself.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a set of C++ wrapper functions.
|
||||
|
||||
|
||||
THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
-------------------------
|
||||
|
||||
Contributed by: Google Inc.
|
||||
|
||||
Copyright (c) 2006, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
-----------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the name of Google
|
||||
Inc. nor the names of their contributors may be used to endorse or
|
||||
promote products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
Please see the file LICENCE in the PCRE distribution for licensing details.
|
||||
|
||||
End
|
||||
|
@ -9,7 +9,7 @@ INCLUDES = \
|
||||
-DMAX_NAME_COUNT=10000 \
|
||||
-DMAX_DUPLENGTH=30000 \
|
||||
-DLINK_SIZE=2 \
|
||||
-DEBCDIC=0 \
|
||||
-UEBCDIC \
|
||||
-DPOSIX_MALLOC_THRESHOLD=10 \
|
||||
-I$(top_srcdir) \
|
||||
-I$(srcdir) \
|
||||
|
@ -5,7 +5,7 @@
|
||||
/* This is the public header file for the PCRE library, to be #included by
|
||||
applications that call the PCRE functions.
|
||||
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -41,47 +41,31 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
/* NOTES FOR FUTURE MAINTAINERS: Do not use numbers with leading zeros, because
|
||||
they may be treated as octal constants. The PCRE_PRERELEASE feature is for
|
||||
identifying release candidates. It might be defined as -RC2, for example. In
|
||||
real releases, it should be defined empty. Do not change the alignment of these
|
||||
statments. The code in ./configure greps out the version numbers by using "cut"
|
||||
to get values from column 29 onwards. These are substituted into pcre-config
|
||||
and libpcre.pc. The values are not put into configure.ac and substituted here
|
||||
(which would simplify this issue) because that makes life harder for those who
|
||||
cannot run ./configure. As it now stands, this file need not be edited in that
|
||||
circumstance. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 0
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 18-Dec-2006
|
||||
#define PCRE_MINOR 2
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2007-06-19
|
||||
|
||||
/* Win32 uses DLL by default; it needs special stuff for exported functions
|
||||
when building PCRE. */
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||
don't change an existing definition of PCRE_EXP_DECL. */
|
||||
|
||||
/* But don't do that when building as part of GLib */
|
||||
#if 0
|
||||
#ifdef _WIN32
|
||||
# ifdef PCRE_DEFINITION
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||
# endif
|
||||
# else
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Otherwise, we use the standard "extern". */
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_DATA_SCOPE extern "C"
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_DATA_SCOPE extern
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@ -122,6 +106,7 @@ extern "C" {
|
||||
#define PCRE_NEWLINE_LF 0x00200000
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000
|
||||
#define PCRE_NEWLINE_ANY 0x00400000
|
||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
@ -165,6 +150,8 @@ extern "C" {
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
#define PCRE_INFO_OKPARTIAL 12
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
@ -243,41 +230,41 @@ typedef struct pcre_callout_block {
|
||||
#define pcre_free g_free
|
||||
#define pcre_stack_malloc g_try_malloc
|
||||
|
||||
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
||||
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_DATA_SCOPE int pcre_config(int, void *);
|
||||
PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *,
|
||||
PCRE_EXP_DECL int pcre_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, char *, int);
|
||||
PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
int);
|
||||
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
|
||||
PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
||||
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
||||
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
|
||||
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
char **, char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
|
||||
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
|
||||
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *);
|
||||
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
|
||||
PCRE_DATA_SCOPE int pcre_refcount(pcre *, int);
|
||||
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_DATA_SCOPE const char *pcre_version(void);
|
||||
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
|
||||
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
||||
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
||||
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_EXP_DECL const char *pcre_version(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
@ -1,24 +1,24 @@
|
||||
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||
* the update of the local copy of PCRE.
|
||||
*/
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file is automatically written by the dftables auxiliary
|
||||
program. If you edit it by hand, you might like to edit the Makefile to
|
||||
prevent its ever being regenerated.
|
||||
/* This file contains character tables that are used when no external tables
|
||||
are passed to PCRE by the application that calls it. The tables are used only
|
||||
for characters whose code values are less than 256.
|
||||
|
||||
This file contains the default tables for characters with codes less than
|
||||
128 (ASCII characters). These tables are used when no external tables are
|
||||
passed to PCRE.
|
||||
This is a default version of the tables that assumes ASCII encoding. A program
|
||||
called dftables (which is distributed with PCRE) can be used to build
|
||||
alternative versions of this file. This is necessary if you are running in an
|
||||
EBCDIC environment, or if you want to default to a different encoding, for
|
||||
example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
|
||||
The following #include is present because without it gcc 4.x may remove
|
||||
the array definition from the final binary if PCRE is built into a static
|
||||
library and dead code stripping is activated. This leads to link errors.
|
||||
Pulling in the header ensures that the array gets flagged as "someone
|
||||
outside this compilation unit might reference this" and so it will always
|
||||
be supplied to the linker. */
|
||||
The following #include is present because without it gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
@ -94,11 +94,10 @@ const unsigned char _pcre_default_tables[] = {
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes.
|
||||
Each map is 32 bytes long and the bits run from the least
|
||||
significant end of each byte. The classes that have their own
|
||||
maps are: space, xdigit, digit, upper, lower, word, graph
|
||||
print, punct, and cntrl. Other classes are built from combinations. */
|
||||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
@ -192,4 +191,4 @@ print, punct, and cntrl. Other classes are built from combinations. */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of chartables.c */
|
||||
/* End of pcre_chartables.c */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -58,6 +58,11 @@ used by pcretest. DEBUG is not defined when building a production library. */
|
||||
#endif
|
||||
|
||||
|
||||
/* Macro for setting individual bits in class bitmaps. */
|
||||
|
||||
#define SETBIT(a,b) a[b/8] |= (1 << (b%8))
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Code parameters and static tables *
|
||||
*************************************************/
|
||||
@ -82,21 +87,21 @@ are simple data values; negative values are for special things like \d and so
|
||||
on. Zero means further processing is needed (for things like \x), or the escape
|
||||
is invalid. */
|
||||
|
||||
#if !EBCDIC /* This is the "normal" table for ASCII systems */
|
||||
#ifndef EBCDIC /* This is the "normal" table for ASCII systems */
|
||||
static const short int escapes[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
|
||||
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
|
||||
'@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, /* @ - G */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
|
||||
-ESC_P, -ESC_Q, -ESC_R, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
|
||||
-ESC_H, 0, 0, -ESC_K, 0, 0, 0, 0, /* H - O */
|
||||
-ESC_P, -ESC_Q, -ESC_R, -ESC_S, 0, 0, -ESC_V, -ESC_W, /* P - W */
|
||||
-ESC_X, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
|
||||
'`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */
|
||||
0, 0, 0, -ESC_k, 0, 0, ESC_n, 0, /* h - o */
|
||||
-ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, 0, -ESC_w, /* p - w */
|
||||
-ESC_h, 0, 0, -ESC_k, 0, 0, ESC_n, 0, /* h - o */
|
||||
-ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, -ESC_v, -ESC_w, /* p - w */
|
||||
0, 0, -ESC_z /* x - z */
|
||||
};
|
||||
|
||||
#else /* This is the "abnormal" table for EBCDIC systems */
|
||||
#else /* This is the "abnormal" table for EBCDIC systems */
|
||||
static const short int escapes[] = {
|
||||
/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|',
|
||||
/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0,
|
||||
@ -106,18 +111,18 @@ static const short int escapes[] = {
|
||||
/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"',
|
||||
/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
|
||||
/* 88 */ 0, 0, 0, '{', 0, 0, 0, 0,
|
||||
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
|
||||
/* 90 */ 0, 0, -ESC_k, 'l', 0, ESC_n, 0, -ESC_p,
|
||||
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,
|
||||
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0, 0, -ESC_w, 0,
|
||||
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0,
|
||||
/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0,
|
||||
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
|
||||
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
|
||||
/* C8 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* D0 */ '}', 0, 0, 0, 0, 0, 0, -ESC_P,
|
||||
/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
|
||||
/* E0 */ '\\', 0, -ESC_S, 0, 0, 0, -ESC_W, -ESC_X,
|
||||
/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
|
||||
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
|
||||
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0
|
||||
@ -186,7 +191,7 @@ are no longer used. */
|
||||
|
||||
#define DEAD(s) "\0"
|
||||
|
||||
static const char error_texts[] =
|
||||
static const char error_texts[] =
|
||||
"no error\0"
|
||||
"\\ at end of pattern\0"
|
||||
"\\c at end of pattern\0"
|
||||
@ -221,7 +226,7 @@ static const char error_texts[] =
|
||||
"malformed number or name after (?(\0"
|
||||
"conditional group contains more than two branches\0"
|
||||
"assertion expected after (?(\0"
|
||||
"(?R or (?digits must be followed by )\0"
|
||||
"(?R or (?[+-]digits must be followed by )\0"
|
||||
/* 30 */
|
||||
"unknown POSIX class name\0"
|
||||
"POSIX collating elements are not supported\0"
|
||||
@ -255,7 +260,8 @@ static const char error_texts[] =
|
||||
/* 55 */
|
||||
"repeating a DEFINE group is not allowed\0"
|
||||
"inconsistent NEWLINE options\0"
|
||||
"\\g is not followed by an (optionally braced) non-zero number";
|
||||
"\\g is not followed by a braced name or an optionally braced non-zero number\0"
|
||||
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number";
|
||||
|
||||
static const int error_texts_offsets[] = {
|
||||
0,
|
||||
@ -315,15 +321,14 @@ static const int error_texts_offsets[] = {
|
||||
1796,
|
||||
1839,
|
||||
1879,
|
||||
1908
|
||||
1908,
|
||||
1984
|
||||
};
|
||||
|
||||
|
||||
/* Definition to allow mutual recursion */
|
||||
|
||||
static BOOL
|
||||
compile_regex(int, int, uschar **, const uschar **, int *, BOOL, int, int *,
|
||||
int *, branch_chain *, compile_data *, int *);
|
||||
compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int,
|
||||
int *, int *, branch_chain *, compile_data *, int *);
|
||||
|
||||
|
||||
|
||||
@ -370,11 +375,11 @@ if (c == 0) *errorcodeptr = ERR1;
|
||||
a table. A non-zero result is something that can be returned immediately.
|
||||
Otherwise further processing may be required. */
|
||||
|
||||
#if !EBCDIC /* ASCII coding */
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
else if (c < '0' || c > 'z') {} /* Not alphameric */
|
||||
else if ((i = escapes[c - '0']) != 0) c = i;
|
||||
|
||||
#else /* EBCDIC coding */
|
||||
#else /* EBCDIC coding */
|
||||
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */
|
||||
else if ((i = escapes[c - 0x48]) != 0) c = i;
|
||||
#endif
|
||||
@ -401,11 +406,22 @@ else
|
||||
|
||||
/* \g must be followed by a number, either plain or braced. If positive, it
|
||||
is an absolute backreference. If negative, it is a relative backreference.
|
||||
This is a Perl 5.10 feature. */
|
||||
This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
|
||||
reference to a named group. This is part of Perl's movement towards a
|
||||
unified syntax for back references. As this is synonymous with \k{name}, we
|
||||
fudge it up by pretending it really was \k. */
|
||||
|
||||
case 'g':
|
||||
if (ptr[1] == '{')
|
||||
{
|
||||
const uschar *p;
|
||||
for (p = ptr+2; *p != 0 && *p != '}'; p++)
|
||||
if (*p != '-' && g_ascii_isdigit(*p) == 0) break;
|
||||
if (*p != 0 && *p != '}')
|
||||
{
|
||||
c = -ESC_k;
|
||||
break;
|
||||
}
|
||||
braced = TRUE;
|
||||
ptr++;
|
||||
}
|
||||
@ -511,10 +527,10 @@ else
|
||||
if (c == 0 && cc == '0') continue; /* Leading zeroes */
|
||||
count++;
|
||||
|
||||
#if !EBCDIC /* ASCII coding */
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
if (cc >= 'a') cc -= 32; /* Convert to upper case */
|
||||
c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
|
||||
#else /* EBCDIC coding */
|
||||
#else /* EBCDIC coding */
|
||||
if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */
|
||||
c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
|
||||
#endif
|
||||
@ -538,10 +554,10 @@ else
|
||||
{
|
||||
int cc; /* Some compilers don't like ++ */
|
||||
cc = *(++ptr); /* in initializers */
|
||||
#if !EBCDIC /* ASCII coding */
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
if (cc >= 'a') cc -= 32; /* Convert to upper case */
|
||||
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
|
||||
#else /* EBCDIC coding */
|
||||
#else /* EBCDIC coding */
|
||||
if (cc <= 'z') cc += 64; /* Convert to upper case */
|
||||
c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
|
||||
#endif
|
||||
@ -560,10 +576,10 @@ else
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if !EBCDIC /* ASCII coding */
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
if (c >= 'a' && c <= 'z') c -= 32;
|
||||
c ^= 0x40;
|
||||
#else /* EBCDIC coding */
|
||||
#else /* EBCDIC coding */
|
||||
if (c >= 'a' && c <= 'z') c += 64;
|
||||
c ^= 0xC0;
|
||||
#endif
|
||||
@ -1195,6 +1211,7 @@ for (;;)
|
||||
else
|
||||
{
|
||||
code += _pcre_OP_lengths[c];
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
@ -1215,6 +1232,7 @@ for (;;)
|
||||
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1258,6 +1276,7 @@ for (;;)
|
||||
else
|
||||
{
|
||||
code += _pcre_OP_lengths[c];
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
@ -1278,6 +1297,7 @@ for (;;)
|
||||
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1315,6 +1335,18 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
|
||||
c = *code;
|
||||
|
||||
/* Groups with zero repeats can of course be empty; skip them. */
|
||||
|
||||
if (c == OP_BRAZERO || c == OP_BRAMINZERO)
|
||||
{
|
||||
code += _pcre_OP_lengths[c];
|
||||
do code += GET(code, 1); while (*code == OP_ALT);
|
||||
c = *code;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For other groups, scan the branches. */
|
||||
|
||||
if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
|
||||
{
|
||||
BOOL empty_branch;
|
||||
@ -1331,12 +1363,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
if (!empty_branch) return FALSE; /* All branches are non-empty */
|
||||
|
||||
/* Move past the KET and fudge things so that the increment in the "for"
|
||||
above has no effect. */
|
||||
|
||||
c = OP_END;
|
||||
code += 1 + LINK_SIZE - _pcre_OP_lengths[c];
|
||||
c = *code;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1530,8 +1557,8 @@ check_posix_name(const uschar *ptr, int len)
|
||||
int yield = 0;
|
||||
while (posix_name_lengths[yield] != 0)
|
||||
{
|
||||
if (len == posix_name_lengths[yield] &&
|
||||
strcmp((const char *)ptr, posix_names + offset) == 0) return yield;
|
||||
if (len == posix_name_lengths[yield] &&
|
||||
strcmp((const char *)ptr, posix_names + offset) == 0) return yield;
|
||||
offset += posix_name_lengths[yield] + 1;
|
||||
yield++;
|
||||
}
|
||||
@ -1872,6 +1899,50 @@ if (next >= 0) switch(op_code)
|
||||
case OP_NOT_WORDCHAR:
|
||||
return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;
|
||||
|
||||
case OP_HSPACE:
|
||||
case OP_NOT_HSPACE:
|
||||
switch(next)
|
||||
{
|
||||
case 0x09:
|
||||
case 0x20:
|
||||
case 0xa0:
|
||||
case 0x1680:
|
||||
case 0x180e:
|
||||
case 0x2000:
|
||||
case 0x2001:
|
||||
case 0x2002:
|
||||
case 0x2003:
|
||||
case 0x2004:
|
||||
case 0x2005:
|
||||
case 0x2006:
|
||||
case 0x2007:
|
||||
case 0x2008:
|
||||
case 0x2009:
|
||||
case 0x200A:
|
||||
case 0x202f:
|
||||
case 0x205f:
|
||||
case 0x3000:
|
||||
return op_code != OP_HSPACE;
|
||||
default:
|
||||
return op_code == OP_HSPACE;
|
||||
}
|
||||
|
||||
case OP_VSPACE:
|
||||
case OP_NOT_VSPACE:
|
||||
switch(next)
|
||||
{
|
||||
case 0x0a:
|
||||
case 0x0b:
|
||||
case 0x0c:
|
||||
case 0x0d:
|
||||
case 0x85:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
return op_code != OP_VSPACE;
|
||||
default:
|
||||
return op_code == OP_VSPACE;
|
||||
}
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
@ -1906,12 +1977,57 @@ switch(op_code)
|
||||
case ESC_W:
|
||||
return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
|
||||
|
||||
case ESC_h:
|
||||
case ESC_H:
|
||||
switch(item)
|
||||
{
|
||||
case 0x09:
|
||||
case 0x20:
|
||||
case 0xa0:
|
||||
case 0x1680:
|
||||
case 0x180e:
|
||||
case 0x2000:
|
||||
case 0x2001:
|
||||
case 0x2002:
|
||||
case 0x2003:
|
||||
case 0x2004:
|
||||
case 0x2005:
|
||||
case 0x2006:
|
||||
case 0x2007:
|
||||
case 0x2008:
|
||||
case 0x2009:
|
||||
case 0x200A:
|
||||
case 0x202f:
|
||||
case 0x205f:
|
||||
case 0x3000:
|
||||
return -next != ESC_h;
|
||||
default:
|
||||
return -next == ESC_h;
|
||||
}
|
||||
|
||||
case ESC_v:
|
||||
case ESC_V:
|
||||
switch(item)
|
||||
{
|
||||
case 0x0a:
|
||||
case 0x0b:
|
||||
case 0x0c:
|
||||
case 0x0d:
|
||||
case 0x85:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
return -next != ESC_v;
|
||||
default:
|
||||
return -next == ESC_v;
|
||||
}
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
case OP_DIGIT:
|
||||
return next == -ESC_D || next == -ESC_s || next == -ESC_W;
|
||||
return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
|
||||
next == -ESC_h || next == -ESC_v;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
return next == -ESC_d;
|
||||
@ -1920,10 +2036,23 @@ switch(op_code)
|
||||
return next == -ESC_S || next == -ESC_d || next == -ESC_w;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
return next == -ESC_s;
|
||||
return next == -ESC_s || next == -ESC_h || next == -ESC_v;
|
||||
|
||||
case OP_HSPACE:
|
||||
return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;
|
||||
|
||||
case OP_NOT_HSPACE:
|
||||
return next == -ESC_h;
|
||||
|
||||
/* Can't have \S in here because VT matches \S (Perl anomaly) */
|
||||
case OP_VSPACE:
|
||||
return next == -ESC_V || next == -ESC_d || next == -ESC_w;
|
||||
|
||||
case OP_NOT_VSPACE:
|
||||
return next == -ESC_v;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
return next == -ESC_W || next == -ESC_s;
|
||||
return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
return next == -ESC_w || next == -ESC_d;
|
||||
@ -2038,10 +2167,12 @@ for (;; ptr++)
|
||||
BOOL possessive_quantifier;
|
||||
BOOL is_quantifier;
|
||||
BOOL is_recurse;
|
||||
BOOL reset_bracount;
|
||||
int class_charcount;
|
||||
int class_lastchar;
|
||||
int newoptions;
|
||||
int recno;
|
||||
int refsign;
|
||||
int skipbytes;
|
||||
int subreqbyte;
|
||||
int subfirstbyte;
|
||||
@ -2466,6 +2597,133 @@ for (;; ptr++)
|
||||
else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||
|
||||
c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;
|
||||
|
||||
/* We need to deal with \H, \h, \V, and \v in both phases because
|
||||
they use extra memory. */
|
||||
|
||||
if (-c == ESC_h)
|
||||
{
|
||||
SETBIT(classbits, 0x09); /* VT */
|
||||
SETBIT(classbits, 0x20); /* SPACE */
|
||||
SETBIT(classbits, 0xa0); /* NSBP */
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
class_utf8 = TRUE;
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
if (-c == ESC_H)
|
||||
{
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int x = 0xff;
|
||||
switch (c)
|
||||
{
|
||||
case 0x09/8: x ^= 1 << (0x09%8); break;
|
||||
case 0x20/8: x ^= 1 << (0x20%8); break;
|
||||
case 0xa0/8: x ^= 1 << (0xa0%8); break;
|
||||
default: break;
|
||||
}
|
||||
classbits[c] |= x;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
class_utf8 = TRUE;
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
if (-c == ESC_v)
|
||||
{
|
||||
SETBIT(classbits, 0x0a); /* LF */
|
||||
SETBIT(classbits, 0x0b); /* VT */
|
||||
SETBIT(classbits, 0x0c); /* FF */
|
||||
SETBIT(classbits, 0x0d); /* CR */
|
||||
SETBIT(classbits, 0x85); /* NEL */
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
class_utf8 = TRUE;
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
if (-c == ESC_V)
|
||||
{
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int x = 0xff;
|
||||
switch (c)
|
||||
{
|
||||
case 0x0a/8: x ^= 1 << (0x0a%8);
|
||||
x ^= 1 << (0x0b%8);
|
||||
x ^= 1 << (0x0c%8);
|
||||
x ^= 1 << (0x0d%8);
|
||||
break;
|
||||
case 0x85/8: x ^= 1 << (0x85%8); break;
|
||||
default: break;
|
||||
}
|
||||
classbits[c] |= x;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
class_utf8 = TRUE;
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We need to deal with \P and \p in both phases. */
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
@ -2606,14 +2864,18 @@ for (;; ptr++)
|
||||
unsigned int origd = d;
|
||||
while (get_othercase_range(&cc, origd, &occ, &ocd))
|
||||
{
|
||||
if (occ >= c && ocd <= d) continue; /* Skip embedded ranges */
|
||||
if (occ >= (unsigned int)c &&
|
||||
ocd <= (unsigned int)d)
|
||||
continue; /* Skip embedded ranges */
|
||||
|
||||
if (occ < c && ocd >= c - 1) /* Extend the basic range */
|
||||
if (occ < (unsigned int)c &&
|
||||
ocd >= (unsigned int)c - 1) /* Extend the basic range */
|
||||
{ /* if there is overlap, */
|
||||
c = occ; /* noting that if occ < c */
|
||||
continue; /* we can't have ocd > d */
|
||||
} /* because a subrange is */
|
||||
if (ocd > d && occ <= d + 1) /* always shorter than */
|
||||
if (ocd > (unsigned int)d &&
|
||||
occ <= (unsigned int)d + 1) /* always shorter than */
|
||||
{ /* the basic range. */
|
||||
d = ocd;
|
||||
continue;
|
||||
@ -3511,6 +3773,7 @@ for (;; ptr++)
|
||||
skipbytes = 0;
|
||||
bravalue = OP_CBRA;
|
||||
save_hwm = cd->hwm;
|
||||
reset_bracount = FALSE;
|
||||
|
||||
if (*(++ptr) == '?')
|
||||
{
|
||||
@ -3532,6 +3795,11 @@ for (;; ptr++)
|
||||
continue;
|
||||
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
case '|': /* Reset capture count for each branch */
|
||||
reset_bracount = TRUE;
|
||||
/* Fall through */
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
case ':': /* Non-capturing bracket */
|
||||
bravalue = OP_BRA;
|
||||
@ -3568,6 +3836,7 @@ for (;; ptr++)
|
||||
|
||||
code[1+LINK_SIZE] = OP_CREF;
|
||||
skipbytes = 3;
|
||||
refsign = -1;
|
||||
|
||||
/* Check for a test for recursion in a named group. */
|
||||
|
||||
@ -3591,7 +3860,11 @@ for (;; ptr++)
|
||||
terminator = '\'';
|
||||
ptr++;
|
||||
}
|
||||
else terminator = 0;
|
||||
else
|
||||
{
|
||||
terminator = 0;
|
||||
if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
|
||||
}
|
||||
|
||||
/* We now expect to read a name; any thing else is an error */
|
||||
|
||||
@ -3627,7 +3900,32 @@ for (;; ptr++)
|
||||
if (lengthptr != NULL) break;
|
||||
|
||||
/* In the real compile we do the work of looking for the actual
|
||||
reference. */
|
||||
reference. If the string started with "+" or "-" we require the rest to
|
||||
be digits, in which case recno will be set. */
|
||||
|
||||
if (refsign > 0)
|
||||
{
|
||||
if (recno <= 0)
|
||||
{
|
||||
*errorcodeptr = ERR58;
|
||||
goto FAILED;
|
||||
}
|
||||
if (refsign == '-')
|
||||
{
|
||||
recno = cd->bracount - recno + 1;
|
||||
if (recno <= 0)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
else recno += cd->bracount;
|
||||
PUT2(code, 2+LINK_SIZE, recno);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Otherwise (did not start with "+" or "-"), start by looking for the
|
||||
name. */
|
||||
|
||||
slot = cd->name_table;
|
||||
for (i = 0; i < cd->names_found; i++)
|
||||
@ -3946,19 +4244,54 @@ for (;; ptr++)
|
||||
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
case '-': case '+':
|
||||
case '0': case '1': case '2': case '3': case '4': /* Recursion or */
|
||||
case '5': case '6': case '7': case '8': case '9': /* subroutine */
|
||||
{
|
||||
const uschar *called;
|
||||
|
||||
if ((refsign = *ptr) == '+') ptr++;
|
||||
else if (refsign == '-')
|
||||
{
|
||||
if (g_ascii_isdigit(ptr[1]) == 0)
|
||||
goto OTHER_CHAR_AFTER_QUERY;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
recno = 0;
|
||||
while(g_ascii_isdigit(*ptr) != 0)
|
||||
recno = recno * 10 + *ptr++ - '0';
|
||||
|
||||
if (*ptr != ')')
|
||||
{
|
||||
*errorcodeptr = ERR29;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
if (refsign == '-')
|
||||
{
|
||||
if (recno == 0)
|
||||
{
|
||||
*errorcodeptr = ERR58;
|
||||
goto FAILED;
|
||||
}
|
||||
recno = cd->bracount - recno + 1;
|
||||
if (recno <= 0)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
else if (refsign == '+')
|
||||
{
|
||||
if (recno == 0)
|
||||
{
|
||||
*errorcodeptr = ERR58;
|
||||
goto FAILED;
|
||||
}
|
||||
recno += cd->bracount;
|
||||
}
|
||||
|
||||
/* Come here from code above that handles a named recursion */
|
||||
|
||||
HANDLE_RECURSION:
|
||||
@ -4031,6 +4364,7 @@ for (;; ptr++)
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
default: /* Other characters: check option setting */
|
||||
OTHER_CHAR_AFTER_QUERY:
|
||||
set = unset = 0;
|
||||
optset = &set;
|
||||
|
||||
@ -4165,6 +4499,7 @@ for (;; ptr++)
|
||||
errorcodeptr, /* Where to put an error message */
|
||||
(bravalue == OP_ASSERTBACK ||
|
||||
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
|
||||
reset_bracount, /* True if (?| group */
|
||||
skipbytes, /* Skip over bracket number */
|
||||
&subfirstbyte, /* For possible first char */
|
||||
&subreqbyte, /* For possible last char */
|
||||
@ -4181,9 +4516,11 @@ for (;; ptr++)
|
||||
is on the bracket. */
|
||||
|
||||
/* If this is a conditional bracket, check that there are no more than
|
||||
two branches in the group, or just one if it's a DEFINE group. */
|
||||
two branches in the group, or just one if it's a DEFINE group. We do this
|
||||
in the real compile phase, not in the pre-pass, where the whole group may
|
||||
not be available. */
|
||||
|
||||
if (bravalue == OP_COND)
|
||||
if (bravalue == OP_COND && lengthptr == NULL)
|
||||
{
|
||||
uschar *tc = code;
|
||||
int condcount = 0;
|
||||
@ -4343,12 +4680,13 @@ for (;; ptr++)
|
||||
zerofirstbyte = firstbyte;
|
||||
zeroreqbyte = reqbyte;
|
||||
|
||||
/* \k<name> or \k'name' is a back reference by name (Perl syntax) */
|
||||
/* \k<name> or \k'name' is a back reference by name (Perl syntax).
|
||||
We also support \k{name} (.NET syntax) */
|
||||
|
||||
if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\''))
|
||||
if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
|
||||
{
|
||||
is_recurse = FALSE;
|
||||
terminator = (*(++ptr) == '<')? '>' : '\'';
|
||||
terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
|
||||
goto NAMED_REF_OR_RECURSE;
|
||||
}
|
||||
|
||||
@ -4514,13 +4852,14 @@ This function is used during the pre-compile phase when we are trying to find
|
||||
out the amount of memory needed, as well as during the real compile phase. The
|
||||
value of lengthptr distinguishes the two phases.
|
||||
|
||||
Argument:
|
||||
Arguments:
|
||||
options option bits, including any changes for this subpattern
|
||||
oldims previous settings of ims option bits
|
||||
codeptr -> the address of the current code pointer
|
||||
ptrptr -> the address of the current pattern pointer
|
||||
errorcodeptr -> pointer to error code variable
|
||||
lookbehind TRUE if this is a lookbehind assertion
|
||||
reset_bracount TRUE to reset the count for each branch
|
||||
skipbytes skip this many bytes at start (for brackets and OP_COND)
|
||||
firstbyteptr place to put the first required character, or a negative number
|
||||
reqbyteptr place to put the last required character, or a negative number
|
||||
@ -4534,8 +4873,9 @@ Returns: TRUE on success
|
||||
|
||||
static BOOL
|
||||
compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,
|
||||
int *errorcodeptr, BOOL lookbehind, int skipbytes, int *firstbyteptr,
|
||||
int *reqbyteptr, branch_chain *bcptr, compile_data *cd, int *lengthptr)
|
||||
int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
|
||||
int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd,
|
||||
int *lengthptr)
|
||||
{
|
||||
const uschar *ptr = *ptrptr;
|
||||
uschar *code = *codeptr;
|
||||
@ -4545,6 +4885,8 @@ uschar *reverse_count = NULL;
|
||||
int firstbyte, reqbyte;
|
||||
int branchfirstbyte, branchreqbyte;
|
||||
int length;
|
||||
int orig_bracount;
|
||||
int max_bracount;
|
||||
branch_chain bc;
|
||||
|
||||
bc.outer = bcptr;
|
||||
@ -4573,8 +4915,14 @@ code += 1 + LINK_SIZE + skipbytes;
|
||||
|
||||
/* Loop for each alternative branch */
|
||||
|
||||
orig_bracount = max_bracount = cd->bracount;
|
||||
for (;;)
|
||||
{
|
||||
/* For a (?| group, reset the capturing bracket count so that each branch
|
||||
uses the same numbers. */
|
||||
|
||||
if (reset_bracount) cd->bracount = orig_bracount;
|
||||
|
||||
/* Handle a change of ims options at the start of the branch */
|
||||
|
||||
if ((options & PCRE_IMS) != oldims)
|
||||
@ -4604,6 +4952,11 @@ for (;;)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Keep the highest bracket count in case (?| was used and some branch
|
||||
has fewer than the rest. */
|
||||
|
||||
if (cd->bracount > max_bracount) max_bracount = cd->bracount;
|
||||
|
||||
/* In the real compile phase, there is some post-processing to be done. */
|
||||
|
||||
if (lengthptr == NULL)
|
||||
@ -4667,26 +5020,29 @@ for (;;)
|
||||
}
|
||||
}
|
||||
|
||||
/* Reached end of expression, either ')' or end of pattern. Go back through
|
||||
the alternative branches and reverse the chain of offsets, with the field in
|
||||
the BRA item now becoming an offset to the first alternative. If there are
|
||||
no alternatives, it points to the end of the group. The length in the
|
||||
terminating ket is always the length of the whole bracketed item. If any of
|
||||
the ims options were changed inside the group, compile a resetting op-code
|
||||
following, except at the very end of the pattern. Return leaving the pointer
|
||||
at the terminating char. */
|
||||
/* Reached end of expression, either ')' or end of pattern. In the real
|
||||
compile phase, go back through the alternative branches and reverse the chain
|
||||
of offsets, with the field in the BRA item now becoming an offset to the
|
||||
first alternative. If there are no alternatives, it points to the end of the
|
||||
group. The length in the terminating ket is always the length of the whole
|
||||
bracketed item. If any of the ims options were changed inside the group,
|
||||
compile a resetting op-code following, except at the very end of the pattern.
|
||||
Return leaving the pointer at the terminating char. */
|
||||
|
||||
if (*ptr != '|')
|
||||
{
|
||||
int branch_length = code - last_branch;
|
||||
do
|
||||
if (lengthptr == NULL)
|
||||
{
|
||||
int prev_length = GET(last_branch, 1);
|
||||
PUT(last_branch, 1, branch_length);
|
||||
branch_length = prev_length;
|
||||
last_branch -= branch_length;
|
||||
int branch_length = code - last_branch;
|
||||
do
|
||||
{
|
||||
int prev_length = GET(last_branch, 1);
|
||||
PUT(last_branch, 1, branch_length);
|
||||
branch_length = prev_length;
|
||||
last_branch -= branch_length;
|
||||
}
|
||||
while (branch_length > 0);
|
||||
}
|
||||
while (branch_length > 0);
|
||||
|
||||
/* Fill in the ket */
|
||||
|
||||
@ -4703,6 +5059,10 @@ for (;;)
|
||||
length += 2;
|
||||
}
|
||||
|
||||
/* Retain the highest bracket number, in case resetting was used. */
|
||||
|
||||
cd->bracount = max_bracount;
|
||||
|
||||
/* Set values to pass back */
|
||||
|
||||
*codeptr = code;
|
||||
@ -4713,17 +5073,29 @@ for (;;)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Another branch follows; insert an "or" node. Its length field points back
|
||||
/* Another branch follows. In the pre-compile phase, we can move the code
|
||||
pointer back to where it was for the start of the first branch. (That is,
|
||||
pretend that each branch is the only one.)
|
||||
|
||||
In the real compile phase, insert an ALT node. Its length field points back
|
||||
to the previous branch while the bracket remains open. At the end the chain
|
||||
is reversed. It's done like this so that the start of the bracket has a
|
||||
zero offset until it is closed, making it possible to detect recursion. */
|
||||
|
||||
*code = OP_ALT;
|
||||
PUT(code, 1, code - last_branch);
|
||||
bc.current = last_branch = code;
|
||||
code += 1 + LINK_SIZE;
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
code = *codeptr + 1 + LINK_SIZE + skipbytes;
|
||||
length += 1 + LINK_SIZE;
|
||||
}
|
||||
else
|
||||
{
|
||||
*code = OP_ALT;
|
||||
PUT(code, 1, code - last_branch);
|
||||
bc.current = last_branch = code;
|
||||
code += 1 + LINK_SIZE;
|
||||
}
|
||||
|
||||
ptr++;
|
||||
length += 1 + LINK_SIZE;
|
||||
}
|
||||
/* Control never reaches here */
|
||||
}
|
||||
@ -4990,7 +5362,7 @@ Returns: pointer to compiled data block, or NULL on error,
|
||||
with errorptr and erroroffset set
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE pcre *
|
||||
PCRE_EXP_DEFN pcre *
|
||||
pcre_compile(const char *pattern, int options, const char **errorptr,
|
||||
int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
@ -4998,7 +5370,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
|
||||
}
|
||||
|
||||
|
||||
PCRE_DATA_SCOPE pcre *
|
||||
PCRE_EXP_DEFN pcre *
|
||||
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
|
||||
const char **errorptr, int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
@ -5047,7 +5419,7 @@ if (errorcodeptr != NULL) *errorcodeptr = ERR0;
|
||||
if (erroroffset == NULL)
|
||||
{
|
||||
errorcode = ERR16;
|
||||
goto PCRE_EARLY_ERROR_RETURN;
|
||||
goto PCRE_EARLY_ERROR_RETURN2;
|
||||
}
|
||||
|
||||
*erroroffset = 0;
|
||||
@ -5060,7 +5432,7 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
|
||||
(*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
|
||||
{
|
||||
errorcode = ERR44;
|
||||
goto PCRE_UTF8_ERROR_RETURN;
|
||||
goto PCRE_EARLY_ERROR_RETURN2;
|
||||
}
|
||||
#else
|
||||
if ((options & PCRE_UTF8) != 0)
|
||||
@ -5085,7 +5457,8 @@ cd->cbits = tables + cbits_offset;
|
||||
cd->ctypes = tables + ctypes_offset;
|
||||
|
||||
/* Handle different types of newline. The three bits give seven cases. The
|
||||
current code allows for fixed one- or two-byte sequences, plus "any". */
|
||||
current code allows for fixed one- or two-byte sequences, plus "any" and
|
||||
"anycrlf". */
|
||||
|
||||
switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
|
||||
{
|
||||
@ -5095,10 +5468,15 @@ switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
|
||||
case PCRE_NEWLINE_CR+
|
||||
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
||||
case PCRE_NEWLINE_ANY: newline = -1; break;
|
||||
case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
|
||||
default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
|
||||
}
|
||||
|
||||
if (newline < 0)
|
||||
if (newline == -2)
|
||||
{
|
||||
cd->nltype = NLTYPE_ANYCRLF;
|
||||
}
|
||||
else if (newline < 0)
|
||||
{
|
||||
cd->nltype = NLTYPE_ANY;
|
||||
}
|
||||
@ -5159,7 +5537,8 @@ outside can help speed up starting point checks. */
|
||||
code = cworkspace;
|
||||
*code = OP_BRA;
|
||||
(void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
|
||||
&code, &ptr, &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, &length);
|
||||
&code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
|
||||
&length);
|
||||
if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
|
||||
|
||||
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
|
||||
@ -5227,7 +5606,7 @@ ptr = (const uschar *)pattern;
|
||||
code = (uschar *)codestart;
|
||||
*code = OP_BRA;
|
||||
(void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
|
||||
&errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
|
||||
&errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
|
||||
re->top_bracket = cd->bracount;
|
||||
re->top_backref = cd->top_backref;
|
||||
|
||||
@ -5272,9 +5651,7 @@ if (errorcode != 0)
|
||||
(pcre_free)(re);
|
||||
PCRE_EARLY_ERROR_RETURN:
|
||||
*erroroffset = ptr - (const uschar *)pattern;
|
||||
#ifdef SUPPORT_UTF8
|
||||
PCRE_UTF8_ERROR_RETURN:
|
||||
#endif
|
||||
PCRE_EARLY_ERROR_RETURN2:
|
||||
*errorptr = error_texts + error_texts_offsets[errorcode];
|
||||
if (errorcodeptr != NULL) *errorcodeptr = errorcode;
|
||||
return NULL;
|
||||
@ -5364,7 +5741,7 @@ if ((re->options & PCRE_REQCHSET) != 0)
|
||||
else printf("Req char = \\x%02x%s\n", ch, caseless);
|
||||
}
|
||||
|
||||
pcre_printint(re, stdout);
|
||||
pcre_printint(re, stdout, TRUE);
|
||||
|
||||
/* This check is done here in the debugging case so that the code that
|
||||
was compiled can be seen. */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -58,7 +58,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_config(int what, void *where)
|
||||
{
|
||||
switch (what)
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -63,24 +63,30 @@ applications. */
|
||||
|
||||
/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
|
||||
into others, under special conditions. A gap of 20 between the blocks should be
|
||||
enough. */
|
||||
enough. The resulting opcodes don't have to be less than 256 because they are
|
||||
never stored, so we push them well clear of the normal opcodes. */
|
||||
|
||||
#define OP_PROP_EXTRA 100
|
||||
#define OP_EXTUNI_EXTRA 120
|
||||
#define OP_ANYNL_EXTRA 140
|
||||
#define OP_PROP_EXTRA 300
|
||||
#define OP_EXTUNI_EXTRA 320
|
||||
#define OP_ANYNL_EXTRA 340
|
||||
#define OP_HSPACE_EXTRA 360
|
||||
#define OP_VSPACE_EXTRA 380
|
||||
|
||||
|
||||
/* This table identifies those opcodes that are followed immediately by a
|
||||
character that is to be tested in some way. This makes is possible to
|
||||
centralize the loading of these characters. In the case of Type * etc, the
|
||||
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
|
||||
small value. */
|
||||
small value. ***NOTE*** If the start of this table is modified, the two tables
|
||||
that follow must also be modified. */
|
||||
|
||||
static uschar coptable[] = {
|
||||
0, /* End */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */
|
||||
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
|
||||
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
|
||||
0, 0, /* Any, Anybyte */
|
||||
0, 0, 0, 0, /* NOTPROP, PROP, EXTUNI, ANYNL */
|
||||
0, 0, 0, /* NOTPROP, PROP, EXTUNI */
|
||||
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
||||
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
|
||||
1, /* Char */
|
||||
1, /* Charnc */
|
||||
@ -127,7 +133,7 @@ static uschar coptable[] = {
|
||||
and \w */
|
||||
|
||||
static uschar toptable1[] = {
|
||||
0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
ctype_digit, ctype_digit,
|
||||
ctype_space, ctype_space,
|
||||
ctype_word, ctype_word,
|
||||
@ -135,7 +141,7 @@ static uschar toptable1[] = {
|
||||
};
|
||||
|
||||
static uschar toptable2[] = {
|
||||
0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
ctype_digit, 0,
|
||||
ctype_space, 0,
|
||||
ctype_word, 0,
|
||||
@ -500,7 +506,9 @@ for (;;)
|
||||
const uschar *code;
|
||||
int state_offset = current_state->offset;
|
||||
int count, codevalue;
|
||||
#ifdef SUPPORT_UCP
|
||||
int chartype, script;
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
|
||||
@ -555,10 +563,10 @@ for (;;)
|
||||
permitted.
|
||||
|
||||
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
|
||||
argument that is not a data character - but is always one byte long.
|
||||
Unfortunately, we have to take special action to deal with \P, \p, and
|
||||
\X in this case. To keep the other cases fast, convert these ones to new
|
||||
opcodes. */
|
||||
argument that is not a data character - but is always one byte long. We
|
||||
have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in
|
||||
this case. To keep the other cases fast, convert these ones to new opcodes.
|
||||
*/
|
||||
|
||||
if (coptable[codevalue] > 0)
|
||||
{
|
||||
@ -576,6 +584,10 @@ for (;;)
|
||||
case OP_PROP: codevalue += OP_PROP_EXTRA; break;
|
||||
case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
|
||||
case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
@ -783,13 +795,12 @@ for (;;)
|
||||
break;
|
||||
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Check the next character by Unicode property. We will get here only
|
||||
if the support is in the binary; otherwise a compile-time error occurs.
|
||||
*/
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (clen > 0)
|
||||
@ -970,6 +981,7 @@ for (;;)
|
||||
argument. It keeps the code above fast for the other cases. The argument
|
||||
is in the d variable. */
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
||||
@ -1049,6 +1061,7 @@ for (;;)
|
||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPLUS:
|
||||
@ -1085,6 +1098,97 @@ for (;;)
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPLUS:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x000d:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPLUS:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x09: /* HT */
|
||||
case 0x20: /* SPACE */
|
||||
case 0xa0: /* NBSP */
|
||||
case 0x1680: /* OGHAM SPACE MARK */
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||
case 0x2000: /* EN QUAD */
|
||||
case 0x2001: /* EM QUAD */
|
||||
case 0x2002: /* EN SPACE */
|
||||
case 0x2003: /* EM SPACE */
|
||||
case 0x2004: /* THREE-PER-EM SPACE */
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||
case 0x2006: /* SIX-PER-EM SPACE */
|
||||
case 0x2007: /* FIGURE SPACE */
|
||||
case 0x2008: /* PUNCTUATION SPACE */
|
||||
case 0x2009: /* THIN SPACE */
|
||||
case 0x200A: /* HAIR SPACE */
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
#ifdef SUPPORT_UCP
|
||||
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
||||
@ -1182,6 +1286,7 @@ for (;;)
|
||||
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEQUERY:
|
||||
@ -1226,6 +1331,112 @@ for (;;)
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEQUERY:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS4;
|
||||
|
||||
case OP_VSPACE_EXTRA + OP_TYPESTAR:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS4:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x000d:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + count), 0, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEQUERY:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS5;
|
||||
|
||||
case OP_HSPACE_EXTRA + OP_TYPESTAR:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS5:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x09: /* HT */
|
||||
case 0x20: /* SPACE */
|
||||
case 0xa0: /* NBSP */
|
||||
case 0x1680: /* OGHAM SPACE MARK */
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||
case 0x2000: /* EN QUAD */
|
||||
case 0x2001: /* EM QUAD */
|
||||
case 0x2002: /* EN SPACE */
|
||||
case 0x2003: /* EM SPACE */
|
||||
case 0x2004: /* THREE-PER-EM SPACE */
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||
case 0x2006: /* SIX-PER-EM SPACE */
|
||||
case 0x2007: /* FIGURE SPACE */
|
||||
case 0x2008: /* PUNCTUATION SPACE */
|
||||
case 0x2009: /* THIN SPACE */
|
||||
case 0x200A: /* HAIR SPACE */
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + count), 0, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
#ifdef SUPPORT_UCP
|
||||
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
||||
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
||||
@ -1313,6 +1524,7 @@ for (;;)
|
||||
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEEXACT:
|
||||
@ -1352,6 +1564,103 @@ for (;;)
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEEXACT:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEUPTO:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 4, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x000d:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEEXACT:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEUPTO:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 4, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
case 0x09: /* HT */
|
||||
case 0x20: /* SPACE */
|
||||
case 0xa0: /* NBSP */
|
||||
case 0x1680: /* OGHAM SPACE MARK */
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||
case 0x2000: /* EN QUAD */
|
||||
case 0x2001: /* EM QUAD */
|
||||
case 0x2002: /* EN SPACE */
|
||||
case 0x2003: /* EM SPACE */
|
||||
case 0x2004: /* THREE-PER-EM SPACE */
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||
case 0x2006: /* SIX-PER-EM SPACE */
|
||||
case 0x2007: /* FIGURE SPACE */
|
||||
case 0x2008: /* PUNCTUATION SPACE */
|
||||
case 0x2009: /* THIN SPACE */
|
||||
case 0x200A: /* HAIR SPACE */
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These opcodes are followed by a character that is usually compared
|
||||
to the current subject character; it is loaded into d. We still get
|
||||
@ -1450,6 +1759,102 @@ for (;;)
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_NOT_VSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x000d:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
break;
|
||||
|
||||
default:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case 0x000a:
|
||||
case 0x000b:
|
||||
case 0x000c:
|
||||
case 0x000d:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_NOT_HSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case 0x09: /* HT */
|
||||
case 0x20: /* SPACE */
|
||||
case 0xa0: /* NBSP */
|
||||
case 0x1680: /* OGHAM SPACE MARK */
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||
case 0x2000: /* EN QUAD */
|
||||
case 0x2001: /* EM QUAD */
|
||||
case 0x2002: /* EN SPACE */
|
||||
case 0x2003: /* EM SPACE */
|
||||
case 0x2004: /* THREE-PER-EM SPACE */
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||
case 0x2006: /* SIX-PER-EM SPACE */
|
||||
case 0x2007: /* FIGURE SPACE */
|
||||
case 0x2008: /* PUNCTUATION SPACE */
|
||||
case 0x2009: /* THIN SPACE */
|
||||
case 0x200A: /* HAIR SPACE */
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||
break;
|
||||
|
||||
default:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case 0x09: /* HT */
|
||||
case 0x20: /* SPACE */
|
||||
case 0xa0: /* NBSP */
|
||||
case 0x1680: /* OGHAM SPACE MARK */
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||
case 0x2000: /* EN QUAD */
|
||||
case 0x2001: /* EM QUAD */
|
||||
case 0x2002: /* EN SPACE */
|
||||
case 0x2003: /* EM SPACE */
|
||||
case 0x2004: /* THREE-PER-EM SPACE */
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||
case 0x2006: /* SIX-PER-EM SPACE */
|
||||
case 0x2007: /* FIGURE SPACE */
|
||||
case 0x2008: /* PUNCTUATION SPACE */
|
||||
case 0x2009: /* THIN SPACE */
|
||||
case 0x200A: /* HAIR SPACE */
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Match a negated single character. This is only used for one-byte
|
||||
characters, that is, we know that d < 256. The character we are
|
||||
@ -2057,7 +2462,7 @@ is not anchored.
|
||||
|
||||
Arguments:
|
||||
argument_re points to the compiled expression
|
||||
extra_data points to extra data or is NULL (not currently used)
|
||||
extra_data points to extra data or is NULL
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
@ -2073,7 +2478,7 @@ Returns: > 0 => number of match offset pairs placed in offsets
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
const char *subject, int length, int start_offset, int options, int *offsets,
|
||||
int offsetcount, int *workspace, int wscount)
|
||||
@ -2163,10 +2568,10 @@ md->end_subject = end_subject;
|
||||
md->moptions = options;
|
||||
md->poptions = re->options;
|
||||
|
||||
/* Handle different types of newline. The two bits give four cases. If nothing
|
||||
is set at run time, whatever was used at compile time applies. */
|
||||
/* Handle different types of newline. The three bits give eight cases. If
|
||||
nothing is set at run time, whatever was used at compile time applies. */
|
||||
|
||||
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
|
||||
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
|
||||
PCRE_NEWLINE_BITS)
|
||||
{
|
||||
case 0: newline = NEWLINE; break; /* Compile-time default */
|
||||
@ -2175,10 +2580,15 @@ switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
|
||||
case PCRE_NEWLINE_CR+
|
||||
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
||||
case PCRE_NEWLINE_ANY: newline = -1; break;
|
||||
case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
|
||||
default: return PCRE_ERROR_BADNEWLINE;
|
||||
}
|
||||
|
||||
if (newline < 0)
|
||||
if (newline == -2)
|
||||
{
|
||||
md->nltype = NLTYPE_ANYCRLF;
|
||||
}
|
||||
else if (newline < 0)
|
||||
{
|
||||
md->nltype = NLTYPE_ANY;
|
||||
}
|
||||
@ -2308,6 +2718,16 @@ for (;;)
|
||||
{
|
||||
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
|
||||
current_subject++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||
character. */
|
||||
|
||||
if (current_subject[-1] == '\r' &&
|
||||
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
|
||||
current_subject < end_subject &&
|
||||
*current_subject == '\n')
|
||||
current_subject++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2416,11 +2836,14 @@ for (;;)
|
||||
}
|
||||
if (current_subject > end_subject) break;
|
||||
|
||||
/* If we have just passed a CR and the newline option is CRLF or ANY, and we
|
||||
are now at a LF, advance the match position by one more character. */
|
||||
/* If we have just passed a CR and the newline option is CRLF or ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||
character. */
|
||||
|
||||
if (current_subject[-1] == '\r' &&
|
||||
(md->nltype == NLTYPE_ANY || md->nllen == 2) &&
|
||||
(md->nltype == NLTYPE_ANY ||
|
||||
md->nltype == NLTYPE_ANYCRLF ||
|
||||
md->nllen == 2) &&
|
||||
current_subject < end_subject &&
|
||||
*current_subject == '\n')
|
||||
current_subject++;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -61,7 +61,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||
void *where)
|
||||
{
|
||||
@ -140,6 +140,14 @@ switch (what)
|
||||
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
||||
break;
|
||||
|
||||
case PCRE_INFO_OKPARTIAL:
|
||||
*((int *)where) = (re->options & PCRE_NOPARTIAL) == 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JCHANGED:
|
||||
*((int *)where) = (re->options & PCRE_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -46,14 +46,8 @@ indirection. These values can be changed by the caller, but are shared between
|
||||
all threads. However, when compiling for Virtual Pascal, things are done
|
||||
differently, and global variables are not used (see pcre.in). */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#else
|
||||
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#endif
|
||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
|
||||
/* End of pcre_globals.c */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -68,7 +68,7 @@ Returns: number of capturing subpatterns
|
||||
or negative values on error
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
|
@ -7,7 +7,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -83,8 +83,58 @@ setjmp and stdarg are used is when NO_RECURSE is set. */
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef PCRE_SPY
|
||||
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
|
||||
/* When compiling a DLL for Windows, the exported symbols have to be declared
|
||||
using some MS magic. I found some useful information on this web page:
|
||||
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
|
||||
information there, using __declspec(dllexport) without "extern" we have a
|
||||
definition; with "extern" we have a declaration. The settings here override the
|
||||
setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL,
|
||||
which is all that is needed for applications (they just import the symbols). We
|
||||
use:
|
||||
|
||||
PCRE_EXP_DECL for declarations
|
||||
PCRE_EXP_DEFN for definitions of exported functions
|
||||
PCRE_EXP_DATA_DEFN for definitions of exported variables
|
||||
|
||||
The reason for the two DEFN macros is that in non-Windows environments, one
|
||||
does not want to have "extern" before variable definitions because it leads to
|
||||
compiler warnings. So we distinguish between functions and variables. In
|
||||
Windows, the two should always be the same.
|
||||
|
||||
The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest,
|
||||
which is an application, but needs to import this file in order to "peek" at
|
||||
internals, can #include pcre.h first to get an application's-eye view.
|
||||
|
||||
In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
|
||||
special-purpose environments) might want to stick other stuff in front of
|
||||
exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and
|
||||
PCRE_EXP_DATA_DEFN only if they are not already set. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRE_EXP_DEFN __declspec(dllexport)
|
||||
# define PCRE_EXP_DATA_DEFN __declspec(dllexport)
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# define PCRE_EXP_DEFN
|
||||
# define PCRE_EXP_DATA_DEFN
|
||||
# endif
|
||||
#
|
||||
# else
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRE_EXP_DEFN
|
||||
# define PCRE_EXP_DEFN PCRE_EXP_DECL
|
||||
# endif
|
||||
# ifndef PCRE_EXP_DATA_DEFN
|
||||
# define PCRE_EXP_DATA_DEFN
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
|
||||
@ -125,21 +175,22 @@ characters only go up to 0x7fffffff (though Unicode doesn't go beyond
|
||||
#define NOTACHAR 0xffffffff
|
||||
|
||||
/* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
|
||||
and "all" at present). The following macros are used to package up testing for
|
||||
newlines. NLBLOCK, PSSTART, and PSEND are defined in the various modules to
|
||||
indicate in which datablock the parameters exist, and what the start/end of
|
||||
string field names are. */
|
||||
"any" and "anycrlf" at present). The following macros are used to package up
|
||||
testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
|
||||
modules to indicate in which datablock the parameters exist, and what the
|
||||
start/end of string field names are. */
|
||||
|
||||
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */
|
||||
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
|
||||
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */
|
||||
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
|
||||
#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */
|
||||
|
||||
/* This macro checks for a newline at the given position */
|
||||
|
||||
#define IS_NEWLINE(p) \
|
||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||
((p) < NLBLOCK->PSEND && \
|
||||
_pcre_is_newline((p), NLBLOCK->PSEND, &(NLBLOCK->nllen), utf8) \
|
||||
) \
|
||||
_pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\
|
||||
utf8)) \
|
||||
: \
|
||||
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
|
||||
(p)[0] == NLBLOCK->nl[0] && \
|
||||
@ -152,8 +203,8 @@ string field names are. */
|
||||
#define WAS_NEWLINE(p) \
|
||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||
((p) > NLBLOCK->PSSTART && \
|
||||
_pcre_was_newline((p), NLBLOCK->PSSTART, &(NLBLOCK->nllen), utf8) \
|
||||
) \
|
||||
_pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
|
||||
&(NLBLOCK->nllen), utf8)) \
|
||||
: \
|
||||
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
|
||||
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
|
||||
@ -178,10 +229,12 @@ must begin with PCRE_. */
|
||||
#define USPTR const unsigned char *
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* Include the public PCRE header and the definitions of UCP character property
|
||||
values. */
|
||||
|
||||
#include "pcre.h"
|
||||
#include <pcre.h>
|
||||
#include "ucp.h"
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
@ -189,7 +242,9 @@ need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
||||
option on the command line. */
|
||||
|
||||
#ifdef VPCOMPAT
|
||||
#define strlen(s) _strlen(s)
|
||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||
#define memcmp(s,c,n) _memcmp(s,c,n)
|
||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||
#define memmove(d,s,n) _memmove(d,s,n)
|
||||
#define memset(s,c,n) _memset(s,c,n)
|
||||
@ -198,23 +253,31 @@ option on the command line. */
|
||||
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
|
||||
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
|
||||
is set. Otherwise, include an emulating function for those systems that have
|
||||
neither (there some non-Unix environments where this is the case). This assumes
|
||||
that all calls to memmove are moving strings upwards in store, which is the
|
||||
case in PCRE. */
|
||||
neither (there some non-Unix environments where this is the case). */
|
||||
|
||||
#if ! HAVE_MEMMOVE
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#undef memmove /* some systems may have a macro */
|
||||
#if HAVE_BCOPY
|
||||
#ifdef HAVE_BCOPY
|
||||
#define memmove(a, b, c) bcopy(b, a, c)
|
||||
#else /* HAVE_BCOPY */
|
||||
static void *
|
||||
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
||||
pcre_memmove(void *d, const void *s, size_t n)
|
||||
{
|
||||
size_t i;
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return dest;
|
||||
unsigned char *dest = (unsigned char *)d;
|
||||
const unsigned char *src = (const unsigned char *)s;
|
||||
if (dest > src)
|
||||
{
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return (void *)dest;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < n; ++i) *dest++ = *src++;
|
||||
return (void *)(dest - n);
|
||||
}
|
||||
}
|
||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
||||
#endif /* not HAVE_BCOPY */
|
||||
@ -439,7 +502,8 @@ bits. */
|
||||
/* Masks for identifying the public options that are permitted at compile
|
||||
time, run time, or study time, respectively. */
|
||||
|
||||
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY)
|
||||
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
|
||||
PCRE_NEWLINE_ANYCRLF)
|
||||
|
||||
#define PUBLIC_OPTIONS \
|
||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||
@ -538,9 +602,9 @@ ESC_Z to detect the types that may be repeated. These are the types that
|
||||
consume characters. If any new escapes are put in between that don't consume a
|
||||
character, that code will have to change. */
|
||||
|
||||
enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
|
||||
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_X, ESC_Z, ESC_z,
|
||||
ESC_E, ESC_Q, ESC_k, ESC_REF };
|
||||
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||
ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
|
||||
ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
|
||||
|
||||
|
||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||
@ -562,133 +626,138 @@ enum {
|
||||
|
||||
OP_SOD, /* 1 Start of data: \A */
|
||||
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
||||
OP_NOT_WORD_BOUNDARY, /* 3 \B */
|
||||
OP_WORD_BOUNDARY, /* 4 \b */
|
||||
OP_NOT_DIGIT, /* 5 \D */
|
||||
OP_DIGIT, /* 6 \d */
|
||||
OP_NOT_WHITESPACE, /* 7 \S */
|
||||
OP_WHITESPACE, /* 8 \s */
|
||||
OP_NOT_WORDCHAR, /* 9 \W */
|
||||
OP_WORDCHAR, /* 10 \w */
|
||||
OP_ANY, /* 11 Match any character */
|
||||
OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_NOTPROP, /* 13 \P (not Unicode property) */
|
||||
OP_PROP, /* 14 \p (Unicode property) */
|
||||
OP_ANYNL, /* 15 \R (any newline sequence) */
|
||||
OP_EXTUNI, /* 16 \X (extended Unicode sequence */
|
||||
OP_EODN, /* 17 End of data or \n at end of data: \Z. */
|
||||
OP_EOD, /* 18 End of data: \z */
|
||||
OP_SET_SOM, /* 3 Set start of match (\K) */
|
||||
OP_NOT_WORD_BOUNDARY, /* 4 \B */
|
||||
OP_WORD_BOUNDARY, /* 5 \b */
|
||||
OP_NOT_DIGIT, /* 6 \D */
|
||||
OP_DIGIT, /* 7 \d */
|
||||
OP_NOT_WHITESPACE, /* 8 \S */
|
||||
OP_WHITESPACE, /* 9 \s */
|
||||
OP_NOT_WORDCHAR, /* 10 \W */
|
||||
OP_WORDCHAR, /* 11 \w */
|
||||
OP_ANY, /* 12 Match any character */
|
||||
OP_ANYBYTE, /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_NOTPROP, /* 14 \P (not Unicode property) */
|
||||
OP_PROP, /* 15 \p (Unicode property) */
|
||||
OP_ANYNL, /* 16 \R (any newline sequence) */
|
||||
OP_NOT_HSPACE, /* 17 \H (not horizontal whitespace) */
|
||||
OP_HSPACE, /* 18 \h (horizontal whitespace) */
|
||||
OP_NOT_VSPACE, /* 19 \V (not vertical whitespace) */
|
||||
OP_VSPACE, /* 20 \v (vertical whitespace) */
|
||||
OP_EXTUNI, /* 21 \X (extended Unicode sequence */
|
||||
OP_EODN, /* 22 End of data or \n at end of data: \Z. */
|
||||
OP_EOD, /* 23 End of data: \z */
|
||||
|
||||
OP_OPT, /* 19 Set runtime options */
|
||||
OP_CIRC, /* 20 Start of line - varies with multiline switch */
|
||||
OP_DOLL, /* 21 End of line - varies with multiline switch */
|
||||
OP_CHAR, /* 22 Match one character, casefully */
|
||||
OP_CHARNC, /* 23 Match one character, caselessly */
|
||||
OP_NOT, /* 24 Match one character, not the following one */
|
||||
OP_OPT, /* 24 Set runtime options */
|
||||
OP_CIRC, /* 25 Start of line - varies with multiline switch */
|
||||
OP_DOLL, /* 26 End of line - varies with multiline switch */
|
||||
OP_CHAR, /* 27 Match one character, casefully */
|
||||
OP_CHARNC, /* 28 Match one character, caselessly */
|
||||
OP_NOT, /* 29 Match one character, not the following one */
|
||||
|
||||
OP_STAR, /* 25 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 26 these six opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 27 the minimizing one second. */
|
||||
OP_MINPLUS, /* 28 This first set applies to single characters.*/
|
||||
OP_QUERY, /* 29 */
|
||||
OP_MINQUERY, /* 30 */
|
||||
OP_STAR, /* 30 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 31 these six opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 32 the minimizing one second. */
|
||||
OP_MINPLUS, /* 33 This first set applies to single characters.*/
|
||||
OP_QUERY, /* 34 */
|
||||
OP_MINQUERY, /* 35 */
|
||||
|
||||
OP_UPTO, /* 31 From 0 to n matches */
|
||||
OP_MINUPTO, /* 32 */
|
||||
OP_EXACT, /* 33 Exactly n matches */
|
||||
OP_UPTO, /* 36 From 0 to n matches */
|
||||
OP_MINUPTO, /* 37 */
|
||||
OP_EXACT, /* 38 Exactly n matches */
|
||||
|
||||
OP_POSSTAR, /* 34 Possessified star */
|
||||
OP_POSPLUS, /* 35 Possessified plus */
|
||||
OP_POSQUERY, /* 36 Posesssified query */
|
||||
OP_POSUPTO, /* 37 Possessified upto */
|
||||
OP_POSSTAR, /* 39 Possessified star */
|
||||
OP_POSPLUS, /* 40 Possessified plus */
|
||||
OP_POSQUERY, /* 41 Posesssified query */
|
||||
OP_POSUPTO, /* 42 Possessified upto */
|
||||
|
||||
OP_NOTSTAR, /* 38 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 39 these six opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 40 the minimizing one second. They must be in */
|
||||
OP_NOTMINPLUS, /* 41 exactly the same order as those above. */
|
||||
OP_NOTQUERY, /* 42 This set applies to "not" single characters. */
|
||||
OP_NOTMINQUERY, /* 43 */
|
||||
OP_NOTSTAR, /* 43 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 44 these six opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 45 the minimizing one second. They must be in */
|
||||
OP_NOTMINPLUS, /* 46 exactly the same order as those above. */
|
||||
OP_NOTQUERY, /* 47 This set applies to "not" single characters. */
|
||||
OP_NOTMINQUERY, /* 48 */
|
||||
|
||||
OP_NOTUPTO, /* 44 From 0 to n matches */
|
||||
OP_NOTMINUPTO, /* 45 */
|
||||
OP_NOTEXACT, /* 46 Exactly n matches */
|
||||
OP_NOTUPTO, /* 49 From 0 to n matches */
|
||||
OP_NOTMINUPTO, /* 50 */
|
||||
OP_NOTEXACT, /* 51 Exactly n matches */
|
||||
|
||||
OP_NOTPOSSTAR, /* 47 Possessified versions */
|
||||
OP_NOTPOSPLUS, /* 48 */
|
||||
OP_NOTPOSQUERY, /* 49 */
|
||||
OP_NOTPOSUPTO, /* 50 */
|
||||
OP_NOTPOSSTAR, /* 52 Possessified versions */
|
||||
OP_NOTPOSPLUS, /* 53 */
|
||||
OP_NOTPOSQUERY, /* 54 */
|
||||
OP_NOTPOSUPTO, /* 55 */
|
||||
|
||||
OP_TYPESTAR, /* 51 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 52 these six opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 53 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 54 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 55 This set applies to character types such as \d */
|
||||
OP_TYPEMINQUERY, /* 56 */
|
||||
OP_TYPESTAR, /* 56 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 57 these six opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 58 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 59 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 60 This set applies to character types such as \d */
|
||||
OP_TYPEMINQUERY, /* 61 */
|
||||
|
||||
OP_TYPEUPTO, /* 57 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 58 */
|
||||
OP_TYPEEXACT, /* 59 Exactly n matches */
|
||||
OP_TYPEUPTO, /* 62 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 63 */
|
||||
OP_TYPEEXACT, /* 64 Exactly n matches */
|
||||
|
||||
OP_TYPEPOSSTAR, /* 60 Possessified versions */
|
||||
OP_TYPEPOSPLUS, /* 61 */
|
||||
OP_TYPEPOSQUERY, /* 62 */
|
||||
OP_TYPEPOSUPTO, /* 63 */
|
||||
OP_TYPEPOSSTAR, /* 65 Possessified versions */
|
||||
OP_TYPEPOSPLUS, /* 66 */
|
||||
OP_TYPEPOSQUERY, /* 67 */
|
||||
OP_TYPEPOSUPTO, /* 68 */
|
||||
|
||||
OP_CRSTAR, /* 64 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 65 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 66 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 67 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 68 These are for character classes and back refs */
|
||||
OP_CRMINQUERY, /* 69 */
|
||||
OP_CRRANGE, /* 70 These are different to the three sets above. */
|
||||
OP_CRMINRANGE, /* 71 */
|
||||
OP_CRSTAR, /* 69 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 70 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 71 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 72 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 73 These are for character classes and back refs */
|
||||
OP_CRMINQUERY, /* 74 */
|
||||
OP_CRRANGE, /* 75 These are different to the three sets above. */
|
||||
OP_CRMINRANGE, /* 76 */
|
||||
|
||||
OP_CLASS, /* 72 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 73 Same, but the bitmap was created from a negative
|
||||
OP_CLASS, /* 77 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 78 Same, but the bitmap was created from a negative
|
||||
class - the difference is relevant only when a UTF-8
|
||||
character > 255 is encountered. */
|
||||
|
||||
OP_XCLASS, /* 74 Extended class for handling UTF-8 chars within the
|
||||
OP_XCLASS, /* 79 Extended class for handling UTF-8 chars within the
|
||||
class. This does both positive and negative. */
|
||||
|
||||
OP_REF, /* 75 Match a back reference */
|
||||
OP_RECURSE, /* 76 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 77 Call out to external function if provided */
|
||||
OP_REF, /* 80 Match a back reference */
|
||||
OP_RECURSE, /* 81 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 82 Call out to external function if provided */
|
||||
|
||||
OP_ALT, /* 78 Start of alternation */
|
||||
OP_KET, /* 79 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 80 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 81 order. They are for groups the repeat for ever. */
|
||||
OP_ALT, /* 83 Start of alternation */
|
||||
OP_KET, /* 84 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 85 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 86 order. They are for groups the repeat for ever. */
|
||||
|
||||
/* The assertions must come before BRA, CBRA, ONCE, and COND.*/
|
||||
|
||||
OP_ASSERT, /* 82 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 83 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 84 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 85 Negative lookbehind */
|
||||
OP_REVERSE, /* 86 Move pointer back - used in lookbehind assertions */
|
||||
OP_ASSERT, /* 87 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 88 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 89 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */
|
||||
OP_REVERSE, /* 91 Move pointer back - used in lookbehind assertions */
|
||||
|
||||
/* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
|
||||
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||
|
||||
OP_ONCE, /* 87 Atomic group */
|
||||
OP_BRA, /* 88 Start of non-capturing bracket */
|
||||
OP_CBRA, /* 89 Start of capturing bracket */
|
||||
OP_COND, /* 90 Conditional group */
|
||||
OP_ONCE, /* 92 Atomic group */
|
||||
OP_BRA, /* 83 Start of non-capturing bracket */
|
||||
OP_CBRA, /* 94 Start of capturing bracket */
|
||||
OP_COND, /* 95 Conditional group */
|
||||
|
||||
/* These three must follow the previous three, in the same order. There's a
|
||||
check for >= SBRA to distinguish the two sets. */
|
||||
|
||||
OP_SBRA, /* 91 Start of non-capturing bracket, check empty */
|
||||
OP_SCBRA, /* 92 Start of capturing bracket, check empty */
|
||||
OP_SCOND, /* 93 Conditional group, check empty */
|
||||
OP_SBRA, /* 96 Start of non-capturing bracket, check empty */
|
||||
OP_SCBRA, /* 97 Start of capturing bracket, check empty */
|
||||
OP_SCOND, /* 98 Conditional group, check empty */
|
||||
|
||||
OP_CREF, /* 94 Used to hold a capture number as condition */
|
||||
OP_RREF, /* 95 Used to hold a recursion number as condition */
|
||||
OP_DEF, /* 96 The DEFINE condition */
|
||||
OP_CREF, /* 99 Used to hold a capture number as condition */
|
||||
OP_RREF, /* 100 Used to hold a recursion number as condition */
|
||||
OP_DEF, /* 101 The DEFINE condition */
|
||||
|
||||
OP_BRAZERO, /* 97 These two must remain together and in this */
|
||||
OP_BRAMINZERO /* 98 order. */
|
||||
OP_BRAZERO, /* 102 These two must remain together and in this */
|
||||
OP_BRAMINZERO /* 103 order. */
|
||||
};
|
||||
|
||||
|
||||
@ -696,10 +765,10 @@ enum {
|
||||
for debugging. The macro is referenced only in pcre_printint.c. */
|
||||
|
||||
#define OP_NAME_LIST \
|
||||
"End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \
|
||||
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
|
||||
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \
|
||||
"notprop", "prop", "anynl", "extuni", \
|
||||
"\\Z", "\\z", \
|
||||
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
||||
"extuni", "\\Z", "\\z", \
|
||||
"Opt", "^", "$", "char", "charnc", "not", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
@ -726,9 +795,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
|
||||
#define OP_LENGTHS \
|
||||
1, /* End */ \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
|
||||
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, /* Any, Anybyte */ \
|
||||
3, 3, 1, 1, /* NOTPROP, PROP, EXTUNI, ANYNL */ \
|
||||
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
||||
2, /* Char - the minimum length */ \
|
||||
2, /* Charnc - the minimum length */ \
|
||||
@ -788,7 +859,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57 };
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58 };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
@ -877,21 +948,11 @@ typedef struct recursion_info {
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
int group_num; /* Number of group that was called */
|
||||
const uschar *after_call; /* "Return value": points after the call in the expr */
|
||||
USPTR save_start; /* Old value of md->start_match */
|
||||
USPTR save_start; /* Old value of mstart */
|
||||
int *offset_save; /* Pointer to start of saved offsets */
|
||||
int saved_max; /* Number of saved offsets */
|
||||
} recursion_info;
|
||||
|
||||
/* When compiling in a mode that doesn't use recursive calls to match(),
|
||||
a structure is used to remember local variables on the heap. It is defined in
|
||||
pcre_exec.c, close to the match() function, so that it is easy to keep it in
|
||||
step with any changes of local variable. However, the pointer to the current
|
||||
frame must be saved in some "static" place over a longjmp(). We declare the
|
||||
structure here so that we can put a pointer in the match_data structure. NOTE:
|
||||
This isn't used for a "normal" compilation of pcre. */
|
||||
|
||||
struct heapframe;
|
||||
|
||||
/* Structure for building a chain of data for holding the values of the subject
|
||||
pointer at the start of each subpattern, so as to detect when an empty string
|
||||
has been matched by a subpattern - to break infinite loops. */
|
||||
@ -928,7 +989,7 @@ typedef struct match_data {
|
||||
const uschar *start_code; /* For use when recursing */
|
||||
USPTR start_subject; /* Start of the subject string */
|
||||
USPTR end_subject; /* End of the subject string */
|
||||
USPTR start_match; /* Start of this match attempt */
|
||||
USPTR start_match_ptr; /* Start of matched string */
|
||||
USPTR end_match_ptr; /* Subject position at end match */
|
||||
int end_offset_top; /* Highwater mark at end of match */
|
||||
int capture_last; /* Most recent capture number */
|
||||
@ -937,7 +998,6 @@ typedef struct match_data {
|
||||
int eptrn; /* Next free eptrblock */
|
||||
recursion_info *recursive; /* Linked list of recursion data */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
struct heapframe *thisframe; /* Used only when compiling for no recursion */
|
||||
} match_data;
|
||||
|
||||
/* A similar structure is used for the same purpose by the DFA matching
|
||||
@ -1024,16 +1084,16 @@ extern const uschar _pcre_OP_lengths[];
|
||||
one of the exported public functions. They have to be "external" in the C
|
||||
sense, but are not part of the PCRE public API. */
|
||||
|
||||
extern BOOL _pcre_is_newline(const uschar *, const uschar *, int *,
|
||||
BOOL);
|
||||
extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
|
||||
int *, BOOL);
|
||||
extern int _pcre_ord2utf8(int, uschar *);
|
||||
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||
const pcre_study_data *, pcre_study_data *);
|
||||
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
|
||||
extern unsigned int _pcre_ucp_othercase(const unsigned int);
|
||||
extern int _pcre_valid_utf8(const uschar *, int);
|
||||
extern BOOL _pcre_was_newline(const uschar *, const uschar *, int *,
|
||||
BOOL);
|
||||
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
|
||||
int *, BOOL);
|
||||
extern BOOL _pcre_xclass(int, const uschar *);
|
||||
|
||||
#endif
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -42,9 +42,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL,
|
||||
so for now the type isn't passed into the functions. It can easily be added
|
||||
later if required. The full list of Unicode newline characters is taken from
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
@ -61,6 +60,7 @@ string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf8 TRUE if in utf8 mode
|
||||
@ -69,12 +69,23 @@ Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr,
|
||||
BOOL utf8)
|
||||
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
|
||||
int *lenptr, BOOL utf8)
|
||||
{
|
||||
int c;
|
||||
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
switch(c)
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = 1; return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: /* LF */
|
||||
case 0x000b: /* VT */
|
||||
@ -99,6 +110,7 @@ the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf8 TRUE if in utf8 mode
|
||||
@ -107,8 +119,8 @@ Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr,
|
||||
BOOL utf8)
|
||||
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
|
||||
int *lenptr, BOOL utf8)
|
||||
{
|
||||
int c;
|
||||
ptr--;
|
||||
@ -118,7 +130,16 @@ if (utf8)
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
switch(c)
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -62,6 +62,7 @@ Returns: number of characters placed in the buffer
|
||||
int
|
||||
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
register int i, j;
|
||||
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
||||
if (cvalue <= _pcre_utf8_table1[i]) break;
|
||||
@ -73,6 +74,9 @@ for (j = i; j > 0; j--)
|
||||
}
|
||||
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||
return i + 1;
|
||||
#else
|
||||
return 0; /* Keep compiler happy; this function won't ever be */
|
||||
#endif /* called when SUPPORT_UTF8 is not defined. */
|
||||
}
|
||||
|
||||
/* End of pcre_ord2utf8.c */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -63,7 +63,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
|
||||
a negative error number
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_refcount(pcre *argument_re, int adjust)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)argument_re;
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -394,11 +394,13 @@ do
|
||||
character with a value > 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||
}
|
||||
#endif
|
||||
/* Fall through */
|
||||
|
||||
case OP_CLASS:
|
||||
@ -411,6 +413,7 @@ do
|
||||
value is > 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
||||
@ -428,6 +431,7 @@ do
|
||||
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
||||
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||
}
|
||||
@ -487,7 +491,7 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE pcre_extra *
|
||||
PCRE_EXP_DEFN pcre_extra *
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -61,6 +61,8 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
|
||||
const int _pcre_utf8_table1[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
@ -301,4 +303,6 @@ const ucp_type_table _pcre_utt[] = {
|
||||
|
||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
|
||||
/* End of pcre_tables.c */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -75,7 +75,7 @@ I could find no way of detecting that a macro is defined as an empty string at
|
||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||
the STRING macro with an empty argument when doing the test. */
|
||||
|
||||
PCRE_DATA_SCOPE const char *
|
||||
PCRE_EXP_DEFN const char *
|
||||
pcre_version(void)
|
||||
{
|
||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
Loading…
Reference in New Issue
Block a user