mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-12 15:36:17 +01:00
Update to PCRE 7.2
svn path=/trunk/; revision=5659
This commit is contained in:
parent
4067475919
commit
d966e93faf
@ -1,3 +1,7 @@
|
|||||||
|
2007-07-31 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/pcre/*: Update the internal PCRE to 7.2
|
||||||
|
|
||||||
2007-07-31 Matthias Clasen <mclasen@redhat.com>
|
2007-07-31 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
* glib/pltcheck.sh: Fix some glitches
|
* glib/pltcheck.sh: Fix some glitches
|
||||||
|
@ -1,68 +1,5 @@
|
|||||||
PCRE LICENCE
|
PCRE LICENCE
|
||||||
------------
|
|
||||||
|
|
||||||
PCRE is a library of functions to support regular expressions whose syntax
|
Please see the file LICENCE in the PCRE distribution for licensing details.
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
|
||||||
|
|
||||||
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
|
|
||||||
specified below. The documentation for PCRE, supplied in the "doc"
|
|
||||||
directory, is distributed under the same terms as the software itself.
|
|
||||||
|
|
||||||
The basic library functions are written in C and are freestanding. Also
|
|
||||||
included in the distribution is a set of C++ wrapper functions.
|
|
||||||
|
|
||||||
|
|
||||||
THE BASIC LIBRARY FUNCTIONS
|
|
||||||
---------------------------
|
|
||||||
|
|
||||||
Written by: Philip Hazel
|
|
||||||
Email local part: ph10
|
|
||||||
Email domain: cam.ac.uk
|
|
||||||
|
|
||||||
University of Cambridge Computing Service,
|
|
||||||
Cambridge, England. Phone: +44 1223 334714.
|
|
||||||
|
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
|
|
||||||
THE C++ WRAPPER FUNCTIONS
|
|
||||||
-------------------------
|
|
||||||
|
|
||||||
Contributed by: Google Inc.
|
|
||||||
|
|
||||||
Copyright (c) 2006, Google Inc.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
|
|
||||||
THE "BSD" LICENCE
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the name of Google
|
|
||||||
Inc. nor the names of their contributors may be used to endorse or
|
|
||||||
promote products derived from this software without specific prior
|
|
||||||
written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
End
|
End
|
||||||
|
@ -9,7 +9,7 @@ INCLUDES = \
|
|||||||
-DMAX_NAME_COUNT=10000 \
|
-DMAX_NAME_COUNT=10000 \
|
||||||
-DMAX_DUPLENGTH=30000 \
|
-DMAX_DUPLENGTH=30000 \
|
||||||
-DLINK_SIZE=2 \
|
-DLINK_SIZE=2 \
|
||||||
-DEBCDIC=0 \
|
-UEBCDIC \
|
||||||
-DPOSIX_MALLOC_THRESHOLD=10 \
|
-DPOSIX_MALLOC_THRESHOLD=10 \
|
||||||
-I$(top_srcdir) \
|
-I$(top_srcdir) \
|
||||||
-I$(srcdir) \
|
-I$(srcdir) \
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
/* This is the public header file for the PCRE library, to be #included by
|
/* This is the public header file for the PCRE library, to be #included by
|
||||||
applications that call the PCRE functions.
|
applications that call the PCRE functions.
|
||||||
|
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -41,47 +41,31 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
/* The current PCRE version information. */
|
/* The current PCRE version information. */
|
||||||
|
|
||||||
/* NOTES FOR FUTURE MAINTAINERS: Do not use numbers with leading zeros, because
|
|
||||||
they may be treated as octal constants. The PCRE_PRERELEASE feature is for
|
|
||||||
identifying release candidates. It might be defined as -RC2, for example. In
|
|
||||||
real releases, it should be defined empty. Do not change the alignment of these
|
|
||||||
statments. The code in ./configure greps out the version numbers by using "cut"
|
|
||||||
to get values from column 29 onwards. These are substituted into pcre-config
|
|
||||||
and libpcre.pc. The values are not put into configure.ac and substituted here
|
|
||||||
(which would simplify this issue) because that makes life harder for those who
|
|
||||||
cannot run ./configure. As it now stands, this file need not be edited in that
|
|
||||||
circumstance. */
|
|
||||||
|
|
||||||
#define PCRE_MAJOR 7
|
#define PCRE_MAJOR 7
|
||||||
#define PCRE_MINOR 0
|
#define PCRE_MINOR 2
|
||||||
#define PCRE_PRERELEASE
|
#define PCRE_PRERELEASE
|
||||||
#define PCRE_DATE 18-Dec-2006
|
#define PCRE_DATE 2007-06-19
|
||||||
|
|
||||||
/* Win32 uses DLL by default; it needs special stuff for exported functions
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
when building PCRE. */
|
imported have to be identified as such. When building PCRE, the appropriate
|
||||||
|
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||||
|
don't change an existing definition of PCRE_EXP_DECL. */
|
||||||
|
|
||||||
/* But don't do that when building as part of GLib */
|
#ifndef PCRE_EXP_DECL
|
||||||
#if 0
|
# ifdef _WIN32
|
||||||
#ifdef _WIN32
|
|
||||||
# ifdef PCRE_DEFINITION
|
|
||||||
# ifdef DLL_EXPORT
|
|
||||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
|
||||||
# endif
|
|
||||||
# else
|
|
||||||
# ifndef PCRE_STATIC
|
# ifndef PCRE_STATIC
|
||||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Otherwise, we use the standard "extern". */
|
/* By default, we use the standard "extern" declarations. */
|
||||||
|
|
||||||
#ifndef PCRE_DATA_SCOPE
|
#ifndef PCRE_EXP_DECL
|
||||||
# ifdef __cplusplus
|
# ifdef __cplusplus
|
||||||
# define PCRE_DATA_SCOPE extern "C"
|
# define PCRE_EXP_DECL extern "C"
|
||||||
# else
|
# else
|
||||||
# define PCRE_DATA_SCOPE extern
|
# define PCRE_EXP_DECL extern
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -122,6 +106,7 @@ extern "C" {
|
|||||||
#define PCRE_NEWLINE_LF 0x00200000
|
#define PCRE_NEWLINE_LF 0x00200000
|
||||||
#define PCRE_NEWLINE_CRLF 0x00300000
|
#define PCRE_NEWLINE_CRLF 0x00300000
|
||||||
#define PCRE_NEWLINE_ANY 0x00400000
|
#define PCRE_NEWLINE_ANY 0x00400000
|
||||||
|
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
||||||
|
|
||||||
/* Exec-time and get/set-time error codes */
|
/* Exec-time and get/set-time error codes */
|
||||||
|
|
||||||
@ -165,6 +150,8 @@ extern "C" {
|
|||||||
#define PCRE_INFO_NAMETABLE 9
|
#define PCRE_INFO_NAMETABLE 9
|
||||||
#define PCRE_INFO_STUDYSIZE 10
|
#define PCRE_INFO_STUDYSIZE 10
|
||||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||||
|
#define PCRE_INFO_OKPARTIAL 12
|
||||||
|
#define PCRE_INFO_JCHANGED 13
|
||||||
|
|
||||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||||
compatible. */
|
compatible. */
|
||||||
@ -243,41 +230,41 @@ typedef struct pcre_callout_block {
|
|||||||
#define pcre_free g_free
|
#define pcre_free g_free
|
||||||
#define pcre_stack_malloc g_try_malloc
|
#define pcre_stack_malloc g_try_malloc
|
||||||
|
|
||||||
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||||
|
|
||||||
/* Exported PCRE functions */
|
/* Exported PCRE functions */
|
||||||
|
|
||||||
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
|
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
||||||
const unsigned char *);
|
const unsigned char *);
|
||||||
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
|
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||||
int *, const unsigned char *);
|
int *, const unsigned char *);
|
||||||
PCRE_DATA_SCOPE int pcre_config(int, void *);
|
PCRE_EXP_DECL int pcre_config(int, void *);
|
||||||
PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *,
|
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
||||||
int *, int, const char *, char *, int);
|
int *, int, const char *, char *, int);
|
||||||
PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
|
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||||
int);
|
int);
|
||||||
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||||
const char *, int, int, int, int *, int , int *, int);
|
const char *, int, int, int, int *, int , int *, int);
|
||||||
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||||
int, int, int, int *, int);
|
int, int, int, int *, int);
|
||||||
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
|
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
||||||
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
|
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
||||||
PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||||
void *);
|
void *);
|
||||||
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
|
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
||||||
int *, int, const char *, const char **);
|
int *, int, const char *, const char **);
|
||||||
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
|
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
||||||
PCRE_DATA_SCOPE int pcre_get_stringtable_entries(const pcre *, const char *,
|
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||||
char **, char **);
|
char **, char **);
|
||||||
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
|
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
||||||
const char **);
|
const char **);
|
||||||
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
|
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
||||||
const char ***);
|
const char ***);
|
||||||
PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *);
|
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
|
||||||
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
|
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
||||||
PCRE_DATA_SCOPE int pcre_refcount(pcre *, int);
|
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
||||||
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
|
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||||
PCRE_DATA_SCOPE const char *pcre_version(void);
|
PCRE_EXP_DECL const char *pcre_version(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
|
@ -1,24 +1,24 @@
|
|||||||
/* This file is autogenerated by ../update-pcre/update.sh during
|
|
||||||
* the update of the local copy of PCRE.
|
|
||||||
*/
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This file is automatically written by the dftables auxiliary
|
/* This file contains character tables that are used when no external tables
|
||||||
program. If you edit it by hand, you might like to edit the Makefile to
|
are passed to PCRE by the application that calls it. The tables are used only
|
||||||
prevent its ever being regenerated.
|
for characters whose code values are less than 256.
|
||||||
|
|
||||||
This file contains the default tables for characters with codes less than
|
This is a default version of the tables that assumes ASCII encoding. A program
|
||||||
128 (ASCII characters). These tables are used when no external tables are
|
called dftables (which is distributed with PCRE) can be used to build
|
||||||
passed to PCRE.
|
alternative versions of this file. This is necessary if you are running in an
|
||||||
|
EBCDIC environment, or if you want to default to a different encoding, for
|
||||||
|
example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||||
|
current locale. If PCRE is configured with --enable-rebuild-chartables, this
|
||||||
|
happens automatically.
|
||||||
|
|
||||||
The following #include is present because without it gcc 4.x may remove
|
The following #include is present because without it gcc 4.x may remove the
|
||||||
the array definition from the final binary if PCRE is built into a static
|
array definition from the final binary if PCRE is built into a static library
|
||||||
library and dead code stripping is activated. This leads to link errors.
|
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||||
Pulling in the header ensures that the array gets flagged as "someone
|
header ensures that the array gets flagged as "someone outside this compilation
|
||||||
outside this compilation unit might reference this" and so it will always
|
unit might reference this" and so it will always be supplied to the linker. */
|
||||||
be supplied to the linker. */
|
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
@ -94,11 +94,10 @@ const unsigned char _pcre_default_tables[] = {
|
|||||||
240,241,242,243,244,245,246,247,
|
240,241,242,243,244,245,246,247,
|
||||||
248,249,250,251,252,253,254,255,
|
248,249,250,251,252,253,254,255,
|
||||||
|
|
||||||
/* This table contains bit maps for various character classes.
|
/* This table contains bit maps for various character classes. Each map is 32
|
||||||
Each map is 32 bytes long and the bits run from the least
|
bytes long and the bits run from the least significant end of each byte. The
|
||||||
significant end of each byte. The classes that have their own
|
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||||
maps are: space, xdigit, digit, upper, lower, word, graph
|
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||||
print, punct, and cntrl. Other classes are built from combinations. */
|
|
||||||
|
|
||||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
@ -192,4 +191,4 @@ print, punct, and cntrl. Other classes are built from combinations. */
|
|||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||||
|
|
||||||
/* End of chartables.c */
|
/* End of pcre_chartables.c */
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -58,6 +58,11 @@ used by pcretest. DEBUG is not defined when building a production library. */
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* Macro for setting individual bits in class bitmaps. */
|
||||||
|
|
||||||
|
#define SETBIT(a,b) a[b/8] |= (1 << (b%8))
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Code parameters and static tables *
|
* Code parameters and static tables *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -82,21 +87,21 @@ are simple data values; negative values are for special things like \d and so
|
|||||||
on. Zero means further processing is needed (for things like \x), or the escape
|
on. Zero means further processing is needed (for things like \x), or the escape
|
||||||
is invalid. */
|
is invalid. */
|
||||||
|
|
||||||
#if !EBCDIC /* This is the "normal" table for ASCII systems */
|
#ifndef EBCDIC /* This is the "normal" table for ASCII systems */
|
||||||
static const short int escapes[] = {
|
static const short int escapes[] = {
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
|
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
|
||||||
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
|
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
|
||||||
'@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, /* @ - G */
|
'@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, /* @ - G */
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
|
-ESC_H, 0, 0, -ESC_K, 0, 0, 0, 0, /* H - O */
|
||||||
-ESC_P, -ESC_Q, -ESC_R, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
|
-ESC_P, -ESC_Q, -ESC_R, -ESC_S, 0, 0, -ESC_V, -ESC_W, /* P - W */
|
||||||
-ESC_X, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
|
-ESC_X, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
|
||||||
'`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */
|
'`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */
|
||||||
0, 0, 0, -ESC_k, 0, 0, ESC_n, 0, /* h - o */
|
-ESC_h, 0, 0, -ESC_k, 0, 0, ESC_n, 0, /* h - o */
|
||||||
-ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, 0, -ESC_w, /* p - w */
|
-ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, -ESC_v, -ESC_w, /* p - w */
|
||||||
0, 0, -ESC_z /* x - z */
|
0, 0, -ESC_z /* x - z */
|
||||||
};
|
};
|
||||||
|
|
||||||
#else /* This is the "abnormal" table for EBCDIC systems */
|
#else /* This is the "abnormal" table for EBCDIC systems */
|
||||||
static const short int escapes[] = {
|
static const short int escapes[] = {
|
||||||
/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|',
|
/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|',
|
||||||
/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0,
|
/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0,
|
||||||
@ -106,18 +111,18 @@ static const short int escapes[] = {
|
|||||||
/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"',
|
/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"',
|
||||||
/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
|
/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
|
||||||
/* 88 */ 0, 0, 0, '{', 0, 0, 0, 0,
|
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
|
||||||
/* 90 */ 0, 0, -ESC_k, 'l', 0, ESC_n, 0, -ESC_p,
|
/* 90 */ 0, 0, -ESC_k, 'l', 0, ESC_n, 0, -ESC_p,
|
||||||
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,
|
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,
|
||||||
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0, 0, -ESC_w, 0,
|
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0,
|
||||||
/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0,
|
/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0,
|
||||||
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
|
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
|
||||||
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
|
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
|
||||||
/* C8 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0,
|
||||||
/* D0 */ '}', 0, 0, 0, 0, 0, 0, -ESC_P,
|
/* D0 */ '}', 0, 0, 0, 0, 0, 0, -ESC_P,
|
||||||
/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
|
/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
|
||||||
/* E0 */ '\\', 0, -ESC_S, 0, 0, 0, -ESC_W, -ESC_X,
|
/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
|
||||||
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
|
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
|
||||||
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0
|
/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0
|
||||||
@ -221,7 +226,7 @@ static const char error_texts[] =
|
|||||||
"malformed number or name after (?(\0"
|
"malformed number or name after (?(\0"
|
||||||
"conditional group contains more than two branches\0"
|
"conditional group contains more than two branches\0"
|
||||||
"assertion expected after (?(\0"
|
"assertion expected after (?(\0"
|
||||||
"(?R or (?digits must be followed by )\0"
|
"(?R or (?[+-]digits must be followed by )\0"
|
||||||
/* 30 */
|
/* 30 */
|
||||||
"unknown POSIX class name\0"
|
"unknown POSIX class name\0"
|
||||||
"POSIX collating elements are not supported\0"
|
"POSIX collating elements are not supported\0"
|
||||||
@ -255,7 +260,8 @@ static const char error_texts[] =
|
|||||||
/* 55 */
|
/* 55 */
|
||||||
"repeating a DEFINE group is not allowed\0"
|
"repeating a DEFINE group is not allowed\0"
|
||||||
"inconsistent NEWLINE options\0"
|
"inconsistent NEWLINE options\0"
|
||||||
"\\g is not followed by an (optionally braced) non-zero number";
|
"\\g is not followed by a braced name or an optionally braced non-zero number\0"
|
||||||
|
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number";
|
||||||
|
|
||||||
static const int error_texts_offsets[] = {
|
static const int error_texts_offsets[] = {
|
||||||
0,
|
0,
|
||||||
@ -315,15 +321,14 @@ static const int error_texts_offsets[] = {
|
|||||||
1796,
|
1796,
|
||||||
1839,
|
1839,
|
||||||
1879,
|
1879,
|
||||||
1908
|
1908,
|
||||||
|
1984
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/* Definition to allow mutual recursion */
|
/* Definition to allow mutual recursion */
|
||||||
|
|
||||||
static BOOL
|
static BOOL
|
||||||
compile_regex(int, int, uschar **, const uschar **, int *, BOOL, int, int *,
|
compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int,
|
||||||
int *, branch_chain *, compile_data *, int *);
|
int *, int *, branch_chain *, compile_data *, int *);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -370,11 +375,11 @@ if (c == 0) *errorcodeptr = ERR1;
|
|||||||
a table. A non-zero result is something that can be returned immediately.
|
a table. A non-zero result is something that can be returned immediately.
|
||||||
Otherwise further processing may be required. */
|
Otherwise further processing may be required. */
|
||||||
|
|
||||||
#if !EBCDIC /* ASCII coding */
|
#ifndef EBCDIC /* ASCII coding */
|
||||||
else if (c < '0' || c > 'z') {} /* Not alphameric */
|
else if (c < '0' || c > 'z') {} /* Not alphameric */
|
||||||
else if ((i = escapes[c - '0']) != 0) c = i;
|
else if ((i = escapes[c - '0']) != 0) c = i;
|
||||||
|
|
||||||
#else /* EBCDIC coding */
|
#else /* EBCDIC coding */
|
||||||
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */
|
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */
|
||||||
else if ((i = escapes[c - 0x48]) != 0) c = i;
|
else if ((i = escapes[c - 0x48]) != 0) c = i;
|
||||||
#endif
|
#endif
|
||||||
@ -401,11 +406,22 @@ else
|
|||||||
|
|
||||||
/* \g must be followed by a number, either plain or braced. If positive, it
|
/* \g must be followed by a number, either plain or braced. If positive, it
|
||||||
is an absolute backreference. If negative, it is a relative backreference.
|
is an absolute backreference. If negative, it is a relative backreference.
|
||||||
This is a Perl 5.10 feature. */
|
This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
|
||||||
|
reference to a named group. This is part of Perl's movement towards a
|
||||||
|
unified syntax for back references. As this is synonymous with \k{name}, we
|
||||||
|
fudge it up by pretending it really was \k. */
|
||||||
|
|
||||||
case 'g':
|
case 'g':
|
||||||
if (ptr[1] == '{')
|
if (ptr[1] == '{')
|
||||||
{
|
{
|
||||||
|
const uschar *p;
|
||||||
|
for (p = ptr+2; *p != 0 && *p != '}'; p++)
|
||||||
|
if (*p != '-' && g_ascii_isdigit(*p) == 0) break;
|
||||||
|
if (*p != 0 && *p != '}')
|
||||||
|
{
|
||||||
|
c = -ESC_k;
|
||||||
|
break;
|
||||||
|
}
|
||||||
braced = TRUE;
|
braced = TRUE;
|
||||||
ptr++;
|
ptr++;
|
||||||
}
|
}
|
||||||
@ -511,10 +527,10 @@ else
|
|||||||
if (c == 0 && cc == '0') continue; /* Leading zeroes */
|
if (c == 0 && cc == '0') continue; /* Leading zeroes */
|
||||||
count++;
|
count++;
|
||||||
|
|
||||||
#if !EBCDIC /* ASCII coding */
|
#ifndef EBCDIC /* ASCII coding */
|
||||||
if (cc >= 'a') cc -= 32; /* Convert to upper case */
|
if (cc >= 'a') cc -= 32; /* Convert to upper case */
|
||||||
c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
|
c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
|
||||||
#else /* EBCDIC coding */
|
#else /* EBCDIC coding */
|
||||||
if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */
|
if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */
|
||||||
c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
|
c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
|
||||||
#endif
|
#endif
|
||||||
@ -538,10 +554,10 @@ else
|
|||||||
{
|
{
|
||||||
int cc; /* Some compilers don't like ++ */
|
int cc; /* Some compilers don't like ++ */
|
||||||
cc = *(++ptr); /* in initializers */
|
cc = *(++ptr); /* in initializers */
|
||||||
#if !EBCDIC /* ASCII coding */
|
#ifndef EBCDIC /* ASCII coding */
|
||||||
if (cc >= 'a') cc -= 32; /* Convert to upper case */
|
if (cc >= 'a') cc -= 32; /* Convert to upper case */
|
||||||
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
|
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
|
||||||
#else /* EBCDIC coding */
|
#else /* EBCDIC coding */
|
||||||
if (cc <= 'z') cc += 64; /* Convert to upper case */
|
if (cc <= 'z') cc += 64; /* Convert to upper case */
|
||||||
c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
|
c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
|
||||||
#endif
|
#endif
|
||||||
@ -560,10 +576,10 @@ else
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !EBCDIC /* ASCII coding */
|
#ifndef EBCDIC /* ASCII coding */
|
||||||
if (c >= 'a' && c <= 'z') c -= 32;
|
if (c >= 'a' && c <= 'z') c -= 32;
|
||||||
c ^= 0x40;
|
c ^= 0x40;
|
||||||
#else /* EBCDIC coding */
|
#else /* EBCDIC coding */
|
||||||
if (c >= 'a' && c <= 'z') c += 64;
|
if (c >= 'a' && c <= 'z') c += 64;
|
||||||
c ^= 0xC0;
|
c ^= 0xC0;
|
||||||
#endif
|
#endif
|
||||||
@ -1195,6 +1211,7 @@ for (;;)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
code += _pcre_OP_lengths[c];
|
code += _pcre_OP_lengths[c];
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
if (utf8) switch(c)
|
if (utf8) switch(c)
|
||||||
{
|
{
|
||||||
case OP_CHAR:
|
case OP_CHAR:
|
||||||
@ -1215,6 +1232,7 @@ for (;;)
|
|||||||
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
|
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1258,6 +1276,7 @@ for (;;)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
code += _pcre_OP_lengths[c];
|
code += _pcre_OP_lengths[c];
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
if (utf8) switch(c)
|
if (utf8) switch(c)
|
||||||
{
|
{
|
||||||
case OP_CHAR:
|
case OP_CHAR:
|
||||||
@ -1278,6 +1297,7 @@ for (;;)
|
|||||||
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
|
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1315,6 +1335,18 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
|||||||
|
|
||||||
c = *code;
|
c = *code;
|
||||||
|
|
||||||
|
/* Groups with zero repeats can of course be empty; skip them. */
|
||||||
|
|
||||||
|
if (c == OP_BRAZERO || c == OP_BRAMINZERO)
|
||||||
|
{
|
||||||
|
code += _pcre_OP_lengths[c];
|
||||||
|
do code += GET(code, 1); while (*code == OP_ALT);
|
||||||
|
c = *code;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For other groups, scan the branches. */
|
||||||
|
|
||||||
if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
|
if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
|
||||||
{
|
{
|
||||||
BOOL empty_branch;
|
BOOL empty_branch;
|
||||||
@ -1331,12 +1363,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
|||||||
}
|
}
|
||||||
while (*code == OP_ALT);
|
while (*code == OP_ALT);
|
||||||
if (!empty_branch) return FALSE; /* All branches are non-empty */
|
if (!empty_branch) return FALSE; /* All branches are non-empty */
|
||||||
|
c = *code;
|
||||||
/* Move past the KET and fudge things so that the increment in the "for"
|
|
||||||
above has no effect. */
|
|
||||||
|
|
||||||
c = OP_END;
|
|
||||||
code += 1 + LINK_SIZE - _pcre_OP_lengths[c];
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1530,8 +1557,8 @@ check_posix_name(const uschar *ptr, int len)
|
|||||||
int yield = 0;
|
int yield = 0;
|
||||||
while (posix_name_lengths[yield] != 0)
|
while (posix_name_lengths[yield] != 0)
|
||||||
{
|
{
|
||||||
if (len == posix_name_lengths[yield] &&
|
if (len == posix_name_lengths[yield] &&
|
||||||
strcmp((const char *)ptr, posix_names + offset) == 0) return yield;
|
strcmp((const char *)ptr, posix_names + offset) == 0) return yield;
|
||||||
offset += posix_name_lengths[yield] + 1;
|
offset += posix_name_lengths[yield] + 1;
|
||||||
yield++;
|
yield++;
|
||||||
}
|
}
|
||||||
@ -1872,6 +1899,50 @@ if (next >= 0) switch(op_code)
|
|||||||
case OP_NOT_WORDCHAR:
|
case OP_NOT_WORDCHAR:
|
||||||
return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;
|
return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;
|
||||||
|
|
||||||
|
case OP_HSPACE:
|
||||||
|
case OP_NOT_HSPACE:
|
||||||
|
switch(next)
|
||||||
|
{
|
||||||
|
case 0x09:
|
||||||
|
case 0x20:
|
||||||
|
case 0xa0:
|
||||||
|
case 0x1680:
|
||||||
|
case 0x180e:
|
||||||
|
case 0x2000:
|
||||||
|
case 0x2001:
|
||||||
|
case 0x2002:
|
||||||
|
case 0x2003:
|
||||||
|
case 0x2004:
|
||||||
|
case 0x2005:
|
||||||
|
case 0x2006:
|
||||||
|
case 0x2007:
|
||||||
|
case 0x2008:
|
||||||
|
case 0x2009:
|
||||||
|
case 0x200A:
|
||||||
|
case 0x202f:
|
||||||
|
case 0x205f:
|
||||||
|
case 0x3000:
|
||||||
|
return op_code != OP_HSPACE;
|
||||||
|
default:
|
||||||
|
return op_code == OP_HSPACE;
|
||||||
|
}
|
||||||
|
|
||||||
|
case OP_VSPACE:
|
||||||
|
case OP_NOT_VSPACE:
|
||||||
|
switch(next)
|
||||||
|
{
|
||||||
|
case 0x0a:
|
||||||
|
case 0x0b:
|
||||||
|
case 0x0c:
|
||||||
|
case 0x0d:
|
||||||
|
case 0x85:
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
return op_code != OP_VSPACE;
|
||||||
|
default:
|
||||||
|
return op_code == OP_VSPACE;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
@ -1906,12 +1977,57 @@ switch(op_code)
|
|||||||
case ESC_W:
|
case ESC_W:
|
||||||
return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
|
return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
|
||||||
|
|
||||||
|
case ESC_h:
|
||||||
|
case ESC_H:
|
||||||
|
switch(item)
|
||||||
|
{
|
||||||
|
case 0x09:
|
||||||
|
case 0x20:
|
||||||
|
case 0xa0:
|
||||||
|
case 0x1680:
|
||||||
|
case 0x180e:
|
||||||
|
case 0x2000:
|
||||||
|
case 0x2001:
|
||||||
|
case 0x2002:
|
||||||
|
case 0x2003:
|
||||||
|
case 0x2004:
|
||||||
|
case 0x2005:
|
||||||
|
case 0x2006:
|
||||||
|
case 0x2007:
|
||||||
|
case 0x2008:
|
||||||
|
case 0x2009:
|
||||||
|
case 0x200A:
|
||||||
|
case 0x202f:
|
||||||
|
case 0x205f:
|
||||||
|
case 0x3000:
|
||||||
|
return -next != ESC_h;
|
||||||
|
default:
|
||||||
|
return -next == ESC_h;
|
||||||
|
}
|
||||||
|
|
||||||
|
case ESC_v:
|
||||||
|
case ESC_V:
|
||||||
|
switch(item)
|
||||||
|
{
|
||||||
|
case 0x0a:
|
||||||
|
case 0x0b:
|
||||||
|
case 0x0c:
|
||||||
|
case 0x0d:
|
||||||
|
case 0x85:
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
return -next != ESC_v;
|
||||||
|
default:
|
||||||
|
return -next == ESC_v;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
case OP_DIGIT:
|
case OP_DIGIT:
|
||||||
return next == -ESC_D || next == -ESC_s || next == -ESC_W;
|
return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
|
||||||
|
next == -ESC_h || next == -ESC_v;
|
||||||
|
|
||||||
case OP_NOT_DIGIT:
|
case OP_NOT_DIGIT:
|
||||||
return next == -ESC_d;
|
return next == -ESC_d;
|
||||||
@ -1920,10 +2036,23 @@ switch(op_code)
|
|||||||
return next == -ESC_S || next == -ESC_d || next == -ESC_w;
|
return next == -ESC_S || next == -ESC_d || next == -ESC_w;
|
||||||
|
|
||||||
case OP_NOT_WHITESPACE:
|
case OP_NOT_WHITESPACE:
|
||||||
return next == -ESC_s;
|
return next == -ESC_s || next == -ESC_h || next == -ESC_v;
|
||||||
|
|
||||||
|
case OP_HSPACE:
|
||||||
|
return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;
|
||||||
|
|
||||||
|
case OP_NOT_HSPACE:
|
||||||
|
return next == -ESC_h;
|
||||||
|
|
||||||
|
/* Can't have \S in here because VT matches \S (Perl anomaly) */
|
||||||
|
case OP_VSPACE:
|
||||||
|
return next == -ESC_V || next == -ESC_d || next == -ESC_w;
|
||||||
|
|
||||||
|
case OP_NOT_VSPACE:
|
||||||
|
return next == -ESC_v;
|
||||||
|
|
||||||
case OP_WORDCHAR:
|
case OP_WORDCHAR:
|
||||||
return next == -ESC_W || next == -ESC_s;
|
return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
|
||||||
|
|
||||||
case OP_NOT_WORDCHAR:
|
case OP_NOT_WORDCHAR:
|
||||||
return next == -ESC_w || next == -ESC_d;
|
return next == -ESC_w || next == -ESC_d;
|
||||||
@ -2038,10 +2167,12 @@ for (;; ptr++)
|
|||||||
BOOL possessive_quantifier;
|
BOOL possessive_quantifier;
|
||||||
BOOL is_quantifier;
|
BOOL is_quantifier;
|
||||||
BOOL is_recurse;
|
BOOL is_recurse;
|
||||||
|
BOOL reset_bracount;
|
||||||
int class_charcount;
|
int class_charcount;
|
||||||
int class_lastchar;
|
int class_lastchar;
|
||||||
int newoptions;
|
int newoptions;
|
||||||
int recno;
|
int recno;
|
||||||
|
int refsign;
|
||||||
int skipbytes;
|
int skipbytes;
|
||||||
int subreqbyte;
|
int subreqbyte;
|
||||||
int subfirstbyte;
|
int subfirstbyte;
|
||||||
@ -2466,6 +2597,133 @@ for (;; ptr++)
|
|||||||
else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||
|
else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||
|
||||||
c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;
|
c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;
|
||||||
|
|
||||||
|
/* We need to deal with \H, \h, \V, and \v in both phases because
|
||||||
|
they use extra memory. */
|
||||||
|
|
||||||
|
if (-c == ESC_h)
|
||||||
|
{
|
||||||
|
SETBIT(classbits, 0x09); /* VT */
|
||||||
|
SETBIT(classbits, 0x20); /* SPACE */
|
||||||
|
SETBIT(classbits, 0xa0); /* NSBP */
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
|
if (utf8)
|
||||||
|
{
|
||||||
|
class_utf8 = TRUE;
|
||||||
|
*class_utf8data++ = XCL_SINGLE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_SINGLE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_SINGLE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_SINGLE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_SINGLE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (-c == ESC_H)
|
||||||
|
{
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
{
|
||||||
|
int x = 0xff;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 0x09/8: x ^= 1 << (0x09%8); break;
|
||||||
|
case 0x20/8: x ^= 1 << (0x20%8); break;
|
||||||
|
case 0xa0/8: x ^= 1 << (0xa0%8); break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
classbits[c] |= x;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
|
if (utf8)
|
||||||
|
{
|
||||||
|
class_utf8 = TRUE;
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (-c == ESC_v)
|
||||||
|
{
|
||||||
|
SETBIT(classbits, 0x0a); /* LF */
|
||||||
|
SETBIT(classbits, 0x0b); /* VT */
|
||||||
|
SETBIT(classbits, 0x0c); /* FF */
|
||||||
|
SETBIT(classbits, 0x0d); /* CR */
|
||||||
|
SETBIT(classbits, 0x85); /* NEL */
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
|
if (utf8)
|
||||||
|
{
|
||||||
|
class_utf8 = TRUE;
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (-c == ESC_V)
|
||||||
|
{
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
{
|
||||||
|
int x = 0xff;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 0x0a/8: x ^= 1 << (0x0a%8);
|
||||||
|
x ^= 1 << (0x0b%8);
|
||||||
|
x ^= 1 << (0x0c%8);
|
||||||
|
x ^= 1 << (0x0d%8);
|
||||||
|
break;
|
||||||
|
case 0x85/8: x ^= 1 << (0x85%8); break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
classbits[c] |= x;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
|
if (utf8)
|
||||||
|
{
|
||||||
|
class_utf8 = TRUE;
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);
|
||||||
|
*class_utf8data++ = XCL_RANGE;
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
|
||||||
|
class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* We need to deal with \P and \p in both phases. */
|
/* We need to deal with \P and \p in both phases. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UCP
|
#ifdef SUPPORT_UCP
|
||||||
@ -2606,14 +2864,18 @@ for (;; ptr++)
|
|||||||
unsigned int origd = d;
|
unsigned int origd = d;
|
||||||
while (get_othercase_range(&cc, origd, &occ, &ocd))
|
while (get_othercase_range(&cc, origd, &occ, &ocd))
|
||||||
{
|
{
|
||||||
if (occ >= c && ocd <= d) continue; /* Skip embedded ranges */
|
if (occ >= (unsigned int)c &&
|
||||||
|
ocd <= (unsigned int)d)
|
||||||
|
continue; /* Skip embedded ranges */
|
||||||
|
|
||||||
if (occ < c && ocd >= c - 1) /* Extend the basic range */
|
if (occ < (unsigned int)c &&
|
||||||
|
ocd >= (unsigned int)c - 1) /* Extend the basic range */
|
||||||
{ /* if there is overlap, */
|
{ /* if there is overlap, */
|
||||||
c = occ; /* noting that if occ < c */
|
c = occ; /* noting that if occ < c */
|
||||||
continue; /* we can't have ocd > d */
|
continue; /* we can't have ocd > d */
|
||||||
} /* because a subrange is */
|
} /* because a subrange is */
|
||||||
if (ocd > d && occ <= d + 1) /* always shorter than */
|
if (ocd > (unsigned int)d &&
|
||||||
|
occ <= (unsigned int)d + 1) /* always shorter than */
|
||||||
{ /* the basic range. */
|
{ /* the basic range. */
|
||||||
d = ocd;
|
d = ocd;
|
||||||
continue;
|
continue;
|
||||||
@ -3511,6 +3773,7 @@ for (;; ptr++)
|
|||||||
skipbytes = 0;
|
skipbytes = 0;
|
||||||
bravalue = OP_CBRA;
|
bravalue = OP_CBRA;
|
||||||
save_hwm = cd->hwm;
|
save_hwm = cd->hwm;
|
||||||
|
reset_bracount = FALSE;
|
||||||
|
|
||||||
if (*(++ptr) == '?')
|
if (*(++ptr) == '?')
|
||||||
{
|
{
|
||||||
@ -3532,6 +3795,11 @@ for (;; ptr++)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------ */
|
||||||
|
case '|': /* Reset capture count for each branch */
|
||||||
|
reset_bracount = TRUE;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
case ':': /* Non-capturing bracket */
|
case ':': /* Non-capturing bracket */
|
||||||
bravalue = OP_BRA;
|
bravalue = OP_BRA;
|
||||||
@ -3568,6 +3836,7 @@ for (;; ptr++)
|
|||||||
|
|
||||||
code[1+LINK_SIZE] = OP_CREF;
|
code[1+LINK_SIZE] = OP_CREF;
|
||||||
skipbytes = 3;
|
skipbytes = 3;
|
||||||
|
refsign = -1;
|
||||||
|
|
||||||
/* Check for a test for recursion in a named group. */
|
/* Check for a test for recursion in a named group. */
|
||||||
|
|
||||||
@ -3591,7 +3860,11 @@ for (;; ptr++)
|
|||||||
terminator = '\'';
|
terminator = '\'';
|
||||||
ptr++;
|
ptr++;
|
||||||
}
|
}
|
||||||
else terminator = 0;
|
else
|
||||||
|
{
|
||||||
|
terminator = 0;
|
||||||
|
if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
|
||||||
|
}
|
||||||
|
|
||||||
/* We now expect to read a name; any thing else is an error */
|
/* We now expect to read a name; any thing else is an error */
|
||||||
|
|
||||||
@ -3627,7 +3900,32 @@ for (;; ptr++)
|
|||||||
if (lengthptr != NULL) break;
|
if (lengthptr != NULL) break;
|
||||||
|
|
||||||
/* In the real compile we do the work of looking for the actual
|
/* In the real compile we do the work of looking for the actual
|
||||||
reference. */
|
reference. If the string started with "+" or "-" we require the rest to
|
||||||
|
be digits, in which case recno will be set. */
|
||||||
|
|
||||||
|
if (refsign > 0)
|
||||||
|
{
|
||||||
|
if (recno <= 0)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR58;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
if (refsign == '-')
|
||||||
|
{
|
||||||
|
recno = cd->bracount - recno + 1;
|
||||||
|
if (recno <= 0)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR15;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else recno += cd->bracount;
|
||||||
|
PUT2(code, 2+LINK_SIZE, recno);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise (did not start with "+" or "-"), start by looking for the
|
||||||
|
name. */
|
||||||
|
|
||||||
slot = cd->name_table;
|
slot = cd->name_table;
|
||||||
for (i = 0; i < cd->names_found; i++)
|
for (i = 0; i < cd->names_found; i++)
|
||||||
@ -3946,19 +4244,54 @@ for (;; ptr++)
|
|||||||
|
|
||||||
|
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
|
case '-': case '+':
|
||||||
case '0': case '1': case '2': case '3': case '4': /* Recursion or */
|
case '0': case '1': case '2': case '3': case '4': /* Recursion or */
|
||||||
case '5': case '6': case '7': case '8': case '9': /* subroutine */
|
case '5': case '6': case '7': case '8': case '9': /* subroutine */
|
||||||
{
|
{
|
||||||
const uschar *called;
|
const uschar *called;
|
||||||
|
|
||||||
|
if ((refsign = *ptr) == '+') ptr++;
|
||||||
|
else if (refsign == '-')
|
||||||
|
{
|
||||||
|
if (g_ascii_isdigit(ptr[1]) == 0)
|
||||||
|
goto OTHER_CHAR_AFTER_QUERY;
|
||||||
|
ptr++;
|
||||||
|
}
|
||||||
|
|
||||||
recno = 0;
|
recno = 0;
|
||||||
while(g_ascii_isdigit(*ptr) != 0)
|
while(g_ascii_isdigit(*ptr) != 0)
|
||||||
recno = recno * 10 + *ptr++ - '0';
|
recno = recno * 10 + *ptr++ - '0';
|
||||||
|
|
||||||
if (*ptr != ')')
|
if (*ptr != ')')
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR29;
|
*errorcodeptr = ERR29;
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (refsign == '-')
|
||||||
|
{
|
||||||
|
if (recno == 0)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR58;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
recno = cd->bracount - recno + 1;
|
||||||
|
if (recno <= 0)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR15;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (refsign == '+')
|
||||||
|
{
|
||||||
|
if (recno == 0)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR58;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
recno += cd->bracount;
|
||||||
|
}
|
||||||
|
|
||||||
/* Come here from code above that handles a named recursion */
|
/* Come here from code above that handles a named recursion */
|
||||||
|
|
||||||
HANDLE_RECURSION:
|
HANDLE_RECURSION:
|
||||||
@ -4031,6 +4364,7 @@ for (;; ptr++)
|
|||||||
|
|
||||||
/* ------------------------------------------------------------ */
|
/* ------------------------------------------------------------ */
|
||||||
default: /* Other characters: check option setting */
|
default: /* Other characters: check option setting */
|
||||||
|
OTHER_CHAR_AFTER_QUERY:
|
||||||
set = unset = 0;
|
set = unset = 0;
|
||||||
optset = &set;
|
optset = &set;
|
||||||
|
|
||||||
@ -4165,6 +4499,7 @@ for (;; ptr++)
|
|||||||
errorcodeptr, /* Where to put an error message */
|
errorcodeptr, /* Where to put an error message */
|
||||||
(bravalue == OP_ASSERTBACK ||
|
(bravalue == OP_ASSERTBACK ||
|
||||||
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
|
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
|
||||||
|
reset_bracount, /* True if (?| group */
|
||||||
skipbytes, /* Skip over bracket number */
|
skipbytes, /* Skip over bracket number */
|
||||||
&subfirstbyte, /* For possible first char */
|
&subfirstbyte, /* For possible first char */
|
||||||
&subreqbyte, /* For possible last char */
|
&subreqbyte, /* For possible last char */
|
||||||
@ -4181,9 +4516,11 @@ for (;; ptr++)
|
|||||||
is on the bracket. */
|
is on the bracket. */
|
||||||
|
|
||||||
/* If this is a conditional bracket, check that there are no more than
|
/* If this is a conditional bracket, check that there are no more than
|
||||||
two branches in the group, or just one if it's a DEFINE group. */
|
two branches in the group, or just one if it's a DEFINE group. We do this
|
||||||
|
in the real compile phase, not in the pre-pass, where the whole group may
|
||||||
|
not be available. */
|
||||||
|
|
||||||
if (bravalue == OP_COND)
|
if (bravalue == OP_COND && lengthptr == NULL)
|
||||||
{
|
{
|
||||||
uschar *tc = code;
|
uschar *tc = code;
|
||||||
int condcount = 0;
|
int condcount = 0;
|
||||||
@ -4343,12 +4680,13 @@ for (;; ptr++)
|
|||||||
zerofirstbyte = firstbyte;
|
zerofirstbyte = firstbyte;
|
||||||
zeroreqbyte = reqbyte;
|
zeroreqbyte = reqbyte;
|
||||||
|
|
||||||
/* \k<name> or \k'name' is a back reference by name (Perl syntax) */
|
/* \k<name> or \k'name' is a back reference by name (Perl syntax).
|
||||||
|
We also support \k{name} (.NET syntax) */
|
||||||
|
|
||||||
if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\''))
|
if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
|
||||||
{
|
{
|
||||||
is_recurse = FALSE;
|
is_recurse = FALSE;
|
||||||
terminator = (*(++ptr) == '<')? '>' : '\'';
|
terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
|
||||||
goto NAMED_REF_OR_RECURSE;
|
goto NAMED_REF_OR_RECURSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4514,13 +4852,14 @@ This function is used during the pre-compile phase when we are trying to find
|
|||||||
out the amount of memory needed, as well as during the real compile phase. The
|
out the amount of memory needed, as well as during the real compile phase. The
|
||||||
value of lengthptr distinguishes the two phases.
|
value of lengthptr distinguishes the two phases.
|
||||||
|
|
||||||
Argument:
|
Arguments:
|
||||||
options option bits, including any changes for this subpattern
|
options option bits, including any changes for this subpattern
|
||||||
oldims previous settings of ims option bits
|
oldims previous settings of ims option bits
|
||||||
codeptr -> the address of the current code pointer
|
codeptr -> the address of the current code pointer
|
||||||
ptrptr -> the address of the current pattern pointer
|
ptrptr -> the address of the current pattern pointer
|
||||||
errorcodeptr -> pointer to error code variable
|
errorcodeptr -> pointer to error code variable
|
||||||
lookbehind TRUE if this is a lookbehind assertion
|
lookbehind TRUE if this is a lookbehind assertion
|
||||||
|
reset_bracount TRUE to reset the count for each branch
|
||||||
skipbytes skip this many bytes at start (for brackets and OP_COND)
|
skipbytes skip this many bytes at start (for brackets and OP_COND)
|
||||||
firstbyteptr place to put the first required character, or a negative number
|
firstbyteptr place to put the first required character, or a negative number
|
||||||
reqbyteptr place to put the last required character, or a negative number
|
reqbyteptr place to put the last required character, or a negative number
|
||||||
@ -4534,8 +4873,9 @@ Returns: TRUE on success
|
|||||||
|
|
||||||
static BOOL
|
static BOOL
|
||||||
compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,
|
compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,
|
||||||
int *errorcodeptr, BOOL lookbehind, int skipbytes, int *firstbyteptr,
|
int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
|
||||||
int *reqbyteptr, branch_chain *bcptr, compile_data *cd, int *lengthptr)
|
int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd,
|
||||||
|
int *lengthptr)
|
||||||
{
|
{
|
||||||
const uschar *ptr = *ptrptr;
|
const uschar *ptr = *ptrptr;
|
||||||
uschar *code = *codeptr;
|
uschar *code = *codeptr;
|
||||||
@ -4545,6 +4885,8 @@ uschar *reverse_count = NULL;
|
|||||||
int firstbyte, reqbyte;
|
int firstbyte, reqbyte;
|
||||||
int branchfirstbyte, branchreqbyte;
|
int branchfirstbyte, branchreqbyte;
|
||||||
int length;
|
int length;
|
||||||
|
int orig_bracount;
|
||||||
|
int max_bracount;
|
||||||
branch_chain bc;
|
branch_chain bc;
|
||||||
|
|
||||||
bc.outer = bcptr;
|
bc.outer = bcptr;
|
||||||
@ -4573,8 +4915,14 @@ code += 1 + LINK_SIZE + skipbytes;
|
|||||||
|
|
||||||
/* Loop for each alternative branch */
|
/* Loop for each alternative branch */
|
||||||
|
|
||||||
|
orig_bracount = max_bracount = cd->bracount;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
/* For a (?| group, reset the capturing bracket count so that each branch
|
||||||
|
uses the same numbers. */
|
||||||
|
|
||||||
|
if (reset_bracount) cd->bracount = orig_bracount;
|
||||||
|
|
||||||
/* Handle a change of ims options at the start of the branch */
|
/* Handle a change of ims options at the start of the branch */
|
||||||
|
|
||||||
if ((options & PCRE_IMS) != oldims)
|
if ((options & PCRE_IMS) != oldims)
|
||||||
@ -4604,6 +4952,11 @@ for (;;)
|
|||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Keep the highest bracket count in case (?| was used and some branch
|
||||||
|
has fewer than the rest. */
|
||||||
|
|
||||||
|
if (cd->bracount > max_bracount) max_bracount = cd->bracount;
|
||||||
|
|
||||||
/* In the real compile phase, there is some post-processing to be done. */
|
/* In the real compile phase, there is some post-processing to be done. */
|
||||||
|
|
||||||
if (lengthptr == NULL)
|
if (lengthptr == NULL)
|
||||||
@ -4667,26 +5020,29 @@ for (;;)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reached end of expression, either ')' or end of pattern. Go back through
|
/* Reached end of expression, either ')' or end of pattern. In the real
|
||||||
the alternative branches and reverse the chain of offsets, with the field in
|
compile phase, go back through the alternative branches and reverse the chain
|
||||||
the BRA item now becoming an offset to the first alternative. If there are
|
of offsets, with the field in the BRA item now becoming an offset to the
|
||||||
no alternatives, it points to the end of the group. The length in the
|
first alternative. If there are no alternatives, it points to the end of the
|
||||||
terminating ket is always the length of the whole bracketed item. If any of
|
group. The length in the terminating ket is always the length of the whole
|
||||||
the ims options were changed inside the group, compile a resetting op-code
|
bracketed item. If any of the ims options were changed inside the group,
|
||||||
following, except at the very end of the pattern. Return leaving the pointer
|
compile a resetting op-code following, except at the very end of the pattern.
|
||||||
at the terminating char. */
|
Return leaving the pointer at the terminating char. */
|
||||||
|
|
||||||
if (*ptr != '|')
|
if (*ptr != '|')
|
||||||
{
|
{
|
||||||
int branch_length = code - last_branch;
|
if (lengthptr == NULL)
|
||||||
do
|
|
||||||
{
|
{
|
||||||
int prev_length = GET(last_branch, 1);
|
int branch_length = code - last_branch;
|
||||||
PUT(last_branch, 1, branch_length);
|
do
|
||||||
branch_length = prev_length;
|
{
|
||||||
last_branch -= branch_length;
|
int prev_length = GET(last_branch, 1);
|
||||||
|
PUT(last_branch, 1, branch_length);
|
||||||
|
branch_length = prev_length;
|
||||||
|
last_branch -= branch_length;
|
||||||
|
}
|
||||||
|
while (branch_length > 0);
|
||||||
}
|
}
|
||||||
while (branch_length > 0);
|
|
||||||
|
|
||||||
/* Fill in the ket */
|
/* Fill in the ket */
|
||||||
|
|
||||||
@ -4703,6 +5059,10 @@ for (;;)
|
|||||||
length += 2;
|
length += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Retain the highest bracket number, in case resetting was used. */
|
||||||
|
|
||||||
|
cd->bracount = max_bracount;
|
||||||
|
|
||||||
/* Set values to pass back */
|
/* Set values to pass back */
|
||||||
|
|
||||||
*codeptr = code;
|
*codeptr = code;
|
||||||
@ -4713,17 +5073,29 @@ for (;;)
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Another branch follows; insert an "or" node. Its length field points back
|
/* Another branch follows. In the pre-compile phase, we can move the code
|
||||||
|
pointer back to where it was for the start of the first branch. (That is,
|
||||||
|
pretend that each branch is the only one.)
|
||||||
|
|
||||||
|
In the real compile phase, insert an ALT node. Its length field points back
|
||||||
to the previous branch while the bracket remains open. At the end the chain
|
to the previous branch while the bracket remains open. At the end the chain
|
||||||
is reversed. It's done like this so that the start of the bracket has a
|
is reversed. It's done like this so that the start of the bracket has a
|
||||||
zero offset until it is closed, making it possible to detect recursion. */
|
zero offset until it is closed, making it possible to detect recursion. */
|
||||||
|
|
||||||
*code = OP_ALT;
|
if (lengthptr != NULL)
|
||||||
PUT(code, 1, code - last_branch);
|
{
|
||||||
bc.current = last_branch = code;
|
code = *codeptr + 1 + LINK_SIZE + skipbytes;
|
||||||
code += 1 + LINK_SIZE;
|
length += 1 + LINK_SIZE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*code = OP_ALT;
|
||||||
|
PUT(code, 1, code - last_branch);
|
||||||
|
bc.current = last_branch = code;
|
||||||
|
code += 1 + LINK_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
ptr++;
|
ptr++;
|
||||||
length += 1 + LINK_SIZE;
|
|
||||||
}
|
}
|
||||||
/* Control never reaches here */
|
/* Control never reaches here */
|
||||||
}
|
}
|
||||||
@ -4990,7 +5362,7 @@ Returns: pointer to compiled data block, or NULL on error,
|
|||||||
with errorptr and erroroffset set
|
with errorptr and erroroffset set
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE_DATA_SCOPE pcre *
|
PCRE_EXP_DEFN pcre *
|
||||||
pcre_compile(const char *pattern, int options, const char **errorptr,
|
pcre_compile(const char *pattern, int options, const char **errorptr,
|
||||||
int *erroroffset, const unsigned char *tables)
|
int *erroroffset, const unsigned char *tables)
|
||||||
{
|
{
|
||||||
@ -4998,7 +5370,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
PCRE_DATA_SCOPE pcre *
|
PCRE_EXP_DEFN pcre *
|
||||||
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
|
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
|
||||||
const char **errorptr, int *erroroffset, const unsigned char *tables)
|
const char **errorptr, int *erroroffset, const unsigned char *tables)
|
||||||
{
|
{
|
||||||
@ -5047,7 +5419,7 @@ if (errorcodeptr != NULL) *errorcodeptr = ERR0;
|
|||||||
if (erroroffset == NULL)
|
if (erroroffset == NULL)
|
||||||
{
|
{
|
||||||
errorcode = ERR16;
|
errorcode = ERR16;
|
||||||
goto PCRE_EARLY_ERROR_RETURN;
|
goto PCRE_EARLY_ERROR_RETURN2;
|
||||||
}
|
}
|
||||||
|
|
||||||
*erroroffset = 0;
|
*erroroffset = 0;
|
||||||
@ -5060,7 +5432,7 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
|
|||||||
(*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
|
(*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
|
||||||
{
|
{
|
||||||
errorcode = ERR44;
|
errorcode = ERR44;
|
||||||
goto PCRE_UTF8_ERROR_RETURN;
|
goto PCRE_EARLY_ERROR_RETURN2;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if ((options & PCRE_UTF8) != 0)
|
if ((options & PCRE_UTF8) != 0)
|
||||||
@ -5085,7 +5457,8 @@ cd->cbits = tables + cbits_offset;
|
|||||||
cd->ctypes = tables + ctypes_offset;
|
cd->ctypes = tables + ctypes_offset;
|
||||||
|
|
||||||
/* Handle different types of newline. The three bits give seven cases. The
|
/* Handle different types of newline. The three bits give seven cases. The
|
||||||
current code allows for fixed one- or two-byte sequences, plus "any". */
|
current code allows for fixed one- or two-byte sequences, plus "any" and
|
||||||
|
"anycrlf". */
|
||||||
|
|
||||||
switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
|
switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
|
||||||
{
|
{
|
||||||
@ -5095,10 +5468,15 @@ switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
|
|||||||
case PCRE_NEWLINE_CR+
|
case PCRE_NEWLINE_CR+
|
||||||
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
||||||
case PCRE_NEWLINE_ANY: newline = -1; break;
|
case PCRE_NEWLINE_ANY: newline = -1; break;
|
||||||
|
case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
|
||||||
default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
|
default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newline < 0)
|
if (newline == -2)
|
||||||
|
{
|
||||||
|
cd->nltype = NLTYPE_ANYCRLF;
|
||||||
|
}
|
||||||
|
else if (newline < 0)
|
||||||
{
|
{
|
||||||
cd->nltype = NLTYPE_ANY;
|
cd->nltype = NLTYPE_ANY;
|
||||||
}
|
}
|
||||||
@ -5159,7 +5537,8 @@ outside can help speed up starting point checks. */
|
|||||||
code = cworkspace;
|
code = cworkspace;
|
||||||
*code = OP_BRA;
|
*code = OP_BRA;
|
||||||
(void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
|
(void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
|
||||||
&code, &ptr, &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, &length);
|
&code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
|
||||||
|
&length);
|
||||||
if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
|
if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
|
||||||
|
|
||||||
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
|
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
|
||||||
@ -5227,7 +5606,7 @@ ptr = (const uschar *)pattern;
|
|||||||
code = (uschar *)codestart;
|
code = (uschar *)codestart;
|
||||||
*code = OP_BRA;
|
*code = OP_BRA;
|
||||||
(void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
|
(void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
|
||||||
&errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
|
&errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
|
||||||
re->top_bracket = cd->bracount;
|
re->top_bracket = cd->bracount;
|
||||||
re->top_backref = cd->top_backref;
|
re->top_backref = cd->top_backref;
|
||||||
|
|
||||||
@ -5272,9 +5651,7 @@ if (errorcode != 0)
|
|||||||
(pcre_free)(re);
|
(pcre_free)(re);
|
||||||
PCRE_EARLY_ERROR_RETURN:
|
PCRE_EARLY_ERROR_RETURN:
|
||||||
*erroroffset = ptr - (const uschar *)pattern;
|
*erroroffset = ptr - (const uschar *)pattern;
|
||||||
#ifdef SUPPORT_UTF8
|
PCRE_EARLY_ERROR_RETURN2:
|
||||||
PCRE_UTF8_ERROR_RETURN:
|
|
||||||
#endif
|
|
||||||
*errorptr = error_texts + error_texts_offsets[errorcode];
|
*errorptr = error_texts + error_texts_offsets[errorcode];
|
||||||
if (errorcodeptr != NULL) *errorcodeptr = errorcode;
|
if (errorcodeptr != NULL) *errorcodeptr = errorcode;
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -5364,7 +5741,7 @@ if ((re->options & PCRE_REQCHSET) != 0)
|
|||||||
else printf("Req char = \\x%02x%s\n", ch, caseless);
|
else printf("Req char = \\x%02x%s\n", ch, caseless);
|
||||||
}
|
}
|
||||||
|
|
||||||
pcre_printint(re, stdout);
|
pcre_printint(re, stdout, TRUE);
|
||||||
|
|
||||||
/* This check is done here in the debugging case so that the code that
|
/* This check is done here in the debugging case so that the code that
|
||||||
was compiled can be seen. */
|
was compiled can be seen. */
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -58,7 +58,7 @@ Arguments:
|
|||||||
Returns: 0 if data returned, negative on error
|
Returns: 0 if data returned, negative on error
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE_DATA_SCOPE int
|
PCRE_EXP_DEFN int
|
||||||
pcre_config(int what, void *where)
|
pcre_config(int what, void *where)
|
||||||
{
|
{
|
||||||
switch (what)
|
switch (what)
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -63,24 +63,30 @@ applications. */
|
|||||||
|
|
||||||
/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
|
/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
|
||||||
into others, under special conditions. A gap of 20 between the blocks should be
|
into others, under special conditions. A gap of 20 between the blocks should be
|
||||||
enough. */
|
enough. The resulting opcodes don't have to be less than 256 because they are
|
||||||
|
never stored, so we push them well clear of the normal opcodes. */
|
||||||
|
|
||||||
#define OP_PROP_EXTRA 100
|
#define OP_PROP_EXTRA 300
|
||||||
#define OP_EXTUNI_EXTRA 120
|
#define OP_EXTUNI_EXTRA 320
|
||||||
#define OP_ANYNL_EXTRA 140
|
#define OP_ANYNL_EXTRA 340
|
||||||
|
#define OP_HSPACE_EXTRA 360
|
||||||
|
#define OP_VSPACE_EXTRA 380
|
||||||
|
|
||||||
|
|
||||||
/* This table identifies those opcodes that are followed immediately by a
|
/* This table identifies those opcodes that are followed immediately by a
|
||||||
character that is to be tested in some way. This makes is possible to
|
character that is to be tested in some way. This makes is possible to
|
||||||
centralize the loading of these characters. In the case of Type * etc, the
|
centralize the loading of these characters. In the case of Type * etc, the
|
||||||
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
|
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
|
||||||
small value. */
|
small value. ***NOTE*** If the start of this table is modified, the two tables
|
||||||
|
that follow must also be modified. */
|
||||||
|
|
||||||
static uschar coptable[] = {
|
static uschar coptable[] = {
|
||||||
0, /* End */
|
0, /* End */
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */
|
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
|
||||||
|
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
|
||||||
0, 0, /* Any, Anybyte */
|
0, 0, /* Any, Anybyte */
|
||||||
0, 0, 0, 0, /* NOTPROP, PROP, EXTUNI, ANYNL */
|
0, 0, 0, /* NOTPROP, PROP, EXTUNI */
|
||||||
|
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
||||||
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
|
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
|
||||||
1, /* Char */
|
1, /* Char */
|
||||||
1, /* Charnc */
|
1, /* Charnc */
|
||||||
@ -127,7 +133,7 @@ static uschar coptable[] = {
|
|||||||
and \w */
|
and \w */
|
||||||
|
|
||||||
static uschar toptable1[] = {
|
static uschar toptable1[] = {
|
||||||
0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0,
|
||||||
ctype_digit, ctype_digit,
|
ctype_digit, ctype_digit,
|
||||||
ctype_space, ctype_space,
|
ctype_space, ctype_space,
|
||||||
ctype_word, ctype_word,
|
ctype_word, ctype_word,
|
||||||
@ -135,7 +141,7 @@ static uschar toptable1[] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static uschar toptable2[] = {
|
static uschar toptable2[] = {
|
||||||
0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0,
|
||||||
ctype_digit, 0,
|
ctype_digit, 0,
|
||||||
ctype_space, 0,
|
ctype_space, 0,
|
||||||
ctype_word, 0,
|
ctype_word, 0,
|
||||||
@ -500,7 +506,9 @@ for (;;)
|
|||||||
const uschar *code;
|
const uschar *code;
|
||||||
int state_offset = current_state->offset;
|
int state_offset = current_state->offset;
|
||||||
int count, codevalue;
|
int count, codevalue;
|
||||||
|
#ifdef SUPPORT_UCP
|
||||||
int chartype, script;
|
int chartype, script;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
|
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
|
||||||
@ -555,10 +563,10 @@ for (;;)
|
|||||||
permitted.
|
permitted.
|
||||||
|
|
||||||
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
|
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
|
||||||
argument that is not a data character - but is always one byte long.
|
argument that is not a data character - but is always one byte long. We
|
||||||
Unfortunately, we have to take special action to deal with \P, \p, and
|
have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in
|
||||||
\X in this case. To keep the other cases fast, convert these ones to new
|
this case. To keep the other cases fast, convert these ones to new opcodes.
|
||||||
opcodes. */
|
*/
|
||||||
|
|
||||||
if (coptable[codevalue] > 0)
|
if (coptable[codevalue] > 0)
|
||||||
{
|
{
|
||||||
@ -576,6 +584,10 @@ for (;;)
|
|||||||
case OP_PROP: codevalue += OP_PROP_EXTRA; break;
|
case OP_PROP: codevalue += OP_PROP_EXTRA; break;
|
||||||
case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
|
case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
|
||||||
case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
|
case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
|
||||||
|
case OP_NOT_HSPACE:
|
||||||
|
case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
|
||||||
|
case OP_NOT_VSPACE:
|
||||||
|
case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -783,13 +795,12 @@ for (;;)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
#ifdef SUPPORT_UCP
|
|
||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
/* Check the next character by Unicode property. We will get here only
|
/* Check the next character by Unicode property. We will get here only
|
||||||
if the support is in the binary; otherwise a compile-time error occurs.
|
if the support is in the binary; otherwise a compile-time error occurs.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UCP
|
||||||
case OP_PROP:
|
case OP_PROP:
|
||||||
case OP_NOTPROP:
|
case OP_NOTPROP:
|
||||||
if (clen > 0)
|
if (clen > 0)
|
||||||
@ -970,6 +981,7 @@ for (;;)
|
|||||||
argument. It keeps the code above fast for the other cases. The argument
|
argument. It keeps the code above fast for the other cases. The argument
|
||||||
is in the d variable. */
|
is in the d variable. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UCP
|
||||||
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
||||||
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
||||||
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
||||||
@ -1049,6 +1061,7 @@ for (;;)
|
|||||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
case OP_ANYNL_EXTRA + OP_TYPEPLUS:
|
case OP_ANYNL_EXTRA + OP_TYPEPLUS:
|
||||||
@ -1085,6 +1098,97 @@ for (;;)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEPLUS:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 0x000a:
|
||||||
|
case 0x000b:
|
||||||
|
case 0x000c:
|
||||||
|
case 0x000d:
|
||||||
|
case 0x0085:
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_VSPACE))
|
||||||
|
{
|
||||||
|
if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
ADD_NEW_DATA(-state_offset, count, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEPLUS:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 0x09: /* HT */
|
||||||
|
case 0x20: /* SPACE */
|
||||||
|
case 0xa0: /* NBSP */
|
||||||
|
case 0x1680: /* OGHAM SPACE MARK */
|
||||||
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||||
|
case 0x2000: /* EN QUAD */
|
||||||
|
case 0x2001: /* EM QUAD */
|
||||||
|
case 0x2002: /* EN SPACE */
|
||||||
|
case 0x2003: /* EM SPACE */
|
||||||
|
case 0x2004: /* THREE-PER-EM SPACE */
|
||||||
|
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||||
|
case 0x2006: /* SIX-PER-EM SPACE */
|
||||||
|
case 0x2007: /* FIGURE SPACE */
|
||||||
|
case 0x2008: /* PUNCTUATION SPACE */
|
||||||
|
case 0x2009: /* THIN SPACE */
|
||||||
|
case 0x200A: /* HAIR SPACE */
|
||||||
|
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||||
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||||
|
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_HSPACE))
|
||||||
|
{
|
||||||
|
if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
ADD_NEW_DATA(-state_offset, count, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
#ifdef SUPPORT_UCP
|
||||||
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
||||||
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
||||||
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
||||||
@ -1182,6 +1286,7 @@ for (;;)
|
|||||||
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
case OP_ANYNL_EXTRA + OP_TYPEQUERY:
|
case OP_ANYNL_EXTRA + OP_TYPEQUERY:
|
||||||
@ -1226,6 +1331,112 @@ for (;;)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEQUERY:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||||
|
count = 2;
|
||||||
|
goto QS4;
|
||||||
|
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPESTAR:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
QS4:
|
||||||
|
ADD_ACTIVE(state_offset + 2, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 0x000a:
|
||||||
|
case 0x000b:
|
||||||
|
case 0x000c:
|
||||||
|
case 0x000d:
|
||||||
|
case 0x0085:
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (OK == (d == OP_VSPACE))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||||
|
codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW_DATA(-(state_offset + count), 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEQUERY:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||||
|
count = 2;
|
||||||
|
goto QS5;
|
||||||
|
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPESTAR:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
QS5:
|
||||||
|
ADD_ACTIVE(state_offset + 2, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 0x09: /* HT */
|
||||||
|
case 0x20: /* SPACE */
|
||||||
|
case 0xa0: /* NBSP */
|
||||||
|
case 0x1680: /* OGHAM SPACE MARK */
|
||||||
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||||
|
case 0x2000: /* EN QUAD */
|
||||||
|
case 0x2001: /* EM QUAD */
|
||||||
|
case 0x2002: /* EN SPACE */
|
||||||
|
case 0x2003: /* EM SPACE */
|
||||||
|
case 0x2004: /* THREE-PER-EM SPACE */
|
||||||
|
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||||
|
case 0x2006: /* SIX-PER-EM SPACE */
|
||||||
|
case 0x2007: /* FIGURE SPACE */
|
||||||
|
case 0x2008: /* PUNCTUATION SPACE */
|
||||||
|
case 0x2009: /* THIN SPACE */
|
||||||
|
case 0x200A: /* HAIR SPACE */
|
||||||
|
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||||
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||||
|
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_HSPACE))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||||
|
codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW_DATA(-(state_offset + count), 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
#ifdef SUPPORT_UCP
|
||||||
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
||||||
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
||||||
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
||||||
@ -1313,6 +1524,7 @@ for (;;)
|
|||||||
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
case OP_ANYNL_EXTRA + OP_TYPEEXACT:
|
case OP_ANYNL_EXTRA + OP_TYPEEXACT:
|
||||||
@ -1352,6 +1564,103 @@ for (;;)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEEXACT:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEUPTO:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||||
|
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
|
||||||
|
{ ADD_ACTIVE(state_offset + 4, 0); }
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 0x000a:
|
||||||
|
case 0x000b:
|
||||||
|
case 0x000c:
|
||||||
|
case 0x000d:
|
||||||
|
case 0x0085:
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_VSPACE))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
if (++count >= GET2(code, 1))
|
||||||
|
{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEEXACT:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEUPTO:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||||
|
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
|
||||||
|
{ ADD_ACTIVE(state_offset + 4, 0); }
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case 0x09: /* HT */
|
||||||
|
case 0x20: /* SPACE */
|
||||||
|
case 0xa0: /* NBSP */
|
||||||
|
case 0x1680: /* OGHAM SPACE MARK */
|
||||||
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||||
|
case 0x2000: /* EN QUAD */
|
||||||
|
case 0x2001: /* EM QUAD */
|
||||||
|
case 0x2002: /* EN SPACE */
|
||||||
|
case 0x2003: /* EM SPACE */
|
||||||
|
case 0x2004: /* THREE-PER-EM SPACE */
|
||||||
|
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||||
|
case 0x2006: /* SIX-PER-EM SPACE */
|
||||||
|
case 0x2007: /* FIGURE SPACE */
|
||||||
|
case 0x2008: /* PUNCTUATION SPACE */
|
||||||
|
case 0x2009: /* THIN SPACE */
|
||||||
|
case 0x200A: /* HAIR SPACE */
|
||||||
|
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||||
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||||
|
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_HSPACE))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
if (++count >= GET2(code, 1))
|
||||||
|
{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
/* ========================================================================== */
|
/* ========================================================================== */
|
||||||
/* These opcodes are followed by a character that is usually compared
|
/* These opcodes are followed by a character that is usually compared
|
||||||
to the current subject character; it is loaded into d. We still get
|
to the current subject character; it is loaded into d. We still get
|
||||||
@ -1450,6 +1759,102 @@ for (;;)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_NOT_VSPACE:
|
||||||
|
if (clen > 0) switch(c)
|
||||||
|
{
|
||||||
|
case 0x000a:
|
||||||
|
case 0x000b:
|
||||||
|
case 0x000c:
|
||||||
|
case 0x000d:
|
||||||
|
case 0x0085:
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_VSPACE:
|
||||||
|
if (clen > 0) switch(c)
|
||||||
|
{
|
||||||
|
case 0x000a:
|
||||||
|
case 0x000b:
|
||||||
|
case 0x000c:
|
||||||
|
case 0x000d:
|
||||||
|
case 0x0085:
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_NOT_HSPACE:
|
||||||
|
if (clen > 0) switch(c)
|
||||||
|
{
|
||||||
|
case 0x09: /* HT */
|
||||||
|
case 0x20: /* SPACE */
|
||||||
|
case 0xa0: /* NBSP */
|
||||||
|
case 0x1680: /* OGHAM SPACE MARK */
|
||||||
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||||
|
case 0x2000: /* EN QUAD */
|
||||||
|
case 0x2001: /* EM QUAD */
|
||||||
|
case 0x2002: /* EN SPACE */
|
||||||
|
case 0x2003: /* EM SPACE */
|
||||||
|
case 0x2004: /* THREE-PER-EM SPACE */
|
||||||
|
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||||
|
case 0x2006: /* SIX-PER-EM SPACE */
|
||||||
|
case 0x2007: /* FIGURE SPACE */
|
||||||
|
case 0x2008: /* PUNCTUATION SPACE */
|
||||||
|
case 0x2009: /* THIN SPACE */
|
||||||
|
case 0x200A: /* HAIR SPACE */
|
||||||
|
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||||
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||||
|
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_HSPACE:
|
||||||
|
if (clen > 0) switch(c)
|
||||||
|
{
|
||||||
|
case 0x09: /* HT */
|
||||||
|
case 0x20: /* SPACE */
|
||||||
|
case 0xa0: /* NBSP */
|
||||||
|
case 0x1680: /* OGHAM SPACE MARK */
|
||||||
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
|
||||||
|
case 0x2000: /* EN QUAD */
|
||||||
|
case 0x2001: /* EM QUAD */
|
||||||
|
case 0x2002: /* EN SPACE */
|
||||||
|
case 0x2003: /* EM SPACE */
|
||||||
|
case 0x2004: /* THREE-PER-EM SPACE */
|
||||||
|
case 0x2005: /* FOUR-PER-EM SPACE */
|
||||||
|
case 0x2006: /* SIX-PER-EM SPACE */
|
||||||
|
case 0x2007: /* FIGURE SPACE */
|
||||||
|
case 0x2008: /* PUNCTUATION SPACE */
|
||||||
|
case 0x2009: /* THIN SPACE */
|
||||||
|
case 0x200A: /* HAIR SPACE */
|
||||||
|
case 0x202f: /* NARROW NO-BREAK SPACE */
|
||||||
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
|
||||||
|
case 0x3000: /* IDEOGRAPHIC SPACE */
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
/* Match a negated single character. This is only used for one-byte
|
/* Match a negated single character. This is only used for one-byte
|
||||||
characters, that is, we know that d < 256. The character we are
|
characters, that is, we know that d < 256. The character we are
|
||||||
@ -2057,7 +2462,7 @@ is not anchored.
|
|||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
argument_re points to the compiled expression
|
argument_re points to the compiled expression
|
||||||
extra_data points to extra data or is NULL (not currently used)
|
extra_data points to extra data or is NULL
|
||||||
subject points to the subject string
|
subject points to the subject string
|
||||||
length length of subject string (may contain binary zeros)
|
length length of subject string (may contain binary zeros)
|
||||||
start_offset where to start in the subject string
|
start_offset where to start in the subject string
|
||||||
@ -2073,7 +2478,7 @@ Returns: > 0 => number of match offset pairs placed in offsets
|
|||||||
< -1 => some kind of unexpected problem
|
< -1 => some kind of unexpected problem
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE_DATA_SCOPE int
|
PCRE_EXP_DEFN int
|
||||||
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
||||||
const char *subject, int length, int start_offset, int options, int *offsets,
|
const char *subject, int length, int start_offset, int options, int *offsets,
|
||||||
int offsetcount, int *workspace, int wscount)
|
int offsetcount, int *workspace, int wscount)
|
||||||
@ -2163,10 +2568,10 @@ md->end_subject = end_subject;
|
|||||||
md->moptions = options;
|
md->moptions = options;
|
||||||
md->poptions = re->options;
|
md->poptions = re->options;
|
||||||
|
|
||||||
/* Handle different types of newline. The two bits give four cases. If nothing
|
/* Handle different types of newline. The three bits give eight cases. If
|
||||||
is set at run time, whatever was used at compile time applies. */
|
nothing is set at run time, whatever was used at compile time applies. */
|
||||||
|
|
||||||
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
|
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
|
||||||
PCRE_NEWLINE_BITS)
|
PCRE_NEWLINE_BITS)
|
||||||
{
|
{
|
||||||
case 0: newline = NEWLINE; break; /* Compile-time default */
|
case 0: newline = NEWLINE; break; /* Compile-time default */
|
||||||
@ -2175,10 +2580,15 @@ switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
|
|||||||
case PCRE_NEWLINE_CR+
|
case PCRE_NEWLINE_CR+
|
||||||
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
||||||
case PCRE_NEWLINE_ANY: newline = -1; break;
|
case PCRE_NEWLINE_ANY: newline = -1; break;
|
||||||
|
case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
|
||||||
default: return PCRE_ERROR_BADNEWLINE;
|
default: return PCRE_ERROR_BADNEWLINE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newline < 0)
|
if (newline == -2)
|
||||||
|
{
|
||||||
|
md->nltype = NLTYPE_ANYCRLF;
|
||||||
|
}
|
||||||
|
else if (newline < 0)
|
||||||
{
|
{
|
||||||
md->nltype = NLTYPE_ANY;
|
md->nltype = NLTYPE_ANY;
|
||||||
}
|
}
|
||||||
@ -2308,6 +2718,16 @@ for (;;)
|
|||||||
{
|
{
|
||||||
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
|
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
|
||||||
current_subject++;
|
current_subject++;
|
||||||
|
|
||||||
|
/* If we have just passed a CR and the newline option is ANY or
|
||||||
|
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||||
|
character. */
|
||||||
|
|
||||||
|
if (current_subject[-1] == '\r' &&
|
||||||
|
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
|
||||||
|
current_subject < end_subject &&
|
||||||
|
*current_subject == '\n')
|
||||||
|
current_subject++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2416,11 +2836,14 @@ for (;;)
|
|||||||
}
|
}
|
||||||
if (current_subject > end_subject) break;
|
if (current_subject > end_subject) break;
|
||||||
|
|
||||||
/* If we have just passed a CR and the newline option is CRLF or ANY, and we
|
/* If we have just passed a CR and the newline option is CRLF or ANY or
|
||||||
are now at a LF, advance the match position by one more character. */
|
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||||
|
character. */
|
||||||
|
|
||||||
if (current_subject[-1] == '\r' &&
|
if (current_subject[-1] == '\r' &&
|
||||||
(md->nltype == NLTYPE_ANY || md->nllen == 2) &&
|
(md->nltype == NLTYPE_ANY ||
|
||||||
|
md->nltype == NLTYPE_ANYCRLF ||
|
||||||
|
md->nllen == 2) &&
|
||||||
current_subject < end_subject &&
|
current_subject < end_subject &&
|
||||||
*current_subject == '\n')
|
*current_subject == '\n')
|
||||||
current_subject++;
|
current_subject++;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -61,7 +61,7 @@ Arguments:
|
|||||||
Returns: 0 if data returned, negative on error
|
Returns: 0 if data returned, negative on error
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE_DATA_SCOPE int
|
PCRE_EXP_DEFN int
|
||||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||||
void *where)
|
void *where)
|
||||||
{
|
{
|
||||||
@ -140,6 +140,14 @@ switch (what)
|
|||||||
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_OKPARTIAL:
|
||||||
|
*((int *)where) = (re->options & PCRE_NOPARTIAL) == 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_JCHANGED:
|
||||||
|
*((int *)where) = (re->options & PCRE_JCHANGED) != 0;
|
||||||
|
break;
|
||||||
|
|
||||||
default: return PCRE_ERROR_BADOPTION;
|
default: return PCRE_ERROR_BADOPTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -46,14 +46,8 @@ indirection. These values can be changed by the caller, but are shared between
|
|||||||
all threads. However, when compiling for Virtual Pascal, things are done
|
all threads. However, when compiling for Virtual Pascal, things are done
|
||||||
differently, and global variables are not used (see pcre.in). */
|
differently, and global variables are not used (see pcre.in). */
|
||||||
|
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;
|
|
||||||
#else
|
|
||||||
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* End of pcre_globals.c */
|
/* End of pcre_globals.c */
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -68,7 +68,7 @@ Returns: number of capturing subpatterns
|
|||||||
or negative values on error
|
or negative values on error
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE_DATA_SCOPE int
|
PCRE_EXP_DEFN int
|
||||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||||
{
|
{
|
||||||
real_pcre internal_re;
|
real_pcre internal_re;
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -83,8 +83,58 @@ setjmp and stdarg are used is when NO_RECURSE is set. */
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#ifndef PCRE_SPY
|
/* When compiling a DLL for Windows, the exported symbols have to be declared
|
||||||
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
|
using some MS magic. I found some useful information on this web page:
|
||||||
|
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
|
||||||
|
information there, using __declspec(dllexport) without "extern" we have a
|
||||||
|
definition; with "extern" we have a declaration. The settings here override the
|
||||||
|
setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL,
|
||||||
|
which is all that is needed for applications (they just import the symbols). We
|
||||||
|
use:
|
||||||
|
|
||||||
|
PCRE_EXP_DECL for declarations
|
||||||
|
PCRE_EXP_DEFN for definitions of exported functions
|
||||||
|
PCRE_EXP_DATA_DEFN for definitions of exported variables
|
||||||
|
|
||||||
|
The reason for the two DEFN macros is that in non-Windows environments, one
|
||||||
|
does not want to have "extern" before variable definitions because it leads to
|
||||||
|
compiler warnings. So we distinguish between functions and variables. In
|
||||||
|
Windows, the two should always be the same.
|
||||||
|
|
||||||
|
The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest,
|
||||||
|
which is an application, but needs to import this file in order to "peek" at
|
||||||
|
internals, can #include pcre.h first to get an application's-eye view.
|
||||||
|
|
||||||
|
In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
|
||||||
|
special-purpose environments) might want to stick other stuff in front of
|
||||||
|
exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and
|
||||||
|
PCRE_EXP_DATA_DEFN only if they are not already set. */
|
||||||
|
|
||||||
|
#ifndef PCRE_EXP_DECL
|
||||||
|
# ifdef _WIN32
|
||||||
|
# ifdef DLL_EXPORT
|
||||||
|
# define PCRE_EXP_DECL extern __declspec(dllexport)
|
||||||
|
# define PCRE_EXP_DEFN __declspec(dllexport)
|
||||||
|
# define PCRE_EXP_DATA_DEFN __declspec(dllexport)
|
||||||
|
# else
|
||||||
|
# define PCRE_EXP_DECL extern
|
||||||
|
# define PCRE_EXP_DEFN
|
||||||
|
# define PCRE_EXP_DATA_DEFN
|
||||||
|
# endif
|
||||||
|
#
|
||||||
|
# else
|
||||||
|
# ifdef __cplusplus
|
||||||
|
# define PCRE_EXP_DECL extern "C"
|
||||||
|
# else
|
||||||
|
# define PCRE_EXP_DECL extern
|
||||||
|
# endif
|
||||||
|
# ifndef PCRE_EXP_DEFN
|
||||||
|
# define PCRE_EXP_DEFN PCRE_EXP_DECL
|
||||||
|
# endif
|
||||||
|
# ifndef PCRE_EXP_DATA_DEFN
|
||||||
|
# define PCRE_EXP_DATA_DEFN
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
|
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
|
||||||
@ -125,21 +175,22 @@ characters only go up to 0x7fffffff (though Unicode doesn't go beyond
|
|||||||
#define NOTACHAR 0xffffffff
|
#define NOTACHAR 0xffffffff
|
||||||
|
|
||||||
/* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
|
/* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
|
||||||
and "all" at present). The following macros are used to package up testing for
|
"any" and "anycrlf" at present). The following macros are used to package up
|
||||||
newlines. NLBLOCK, PSSTART, and PSEND are defined in the various modules to
|
testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
|
||||||
indicate in which datablock the parameters exist, and what the start/end of
|
modules to indicate in which datablock the parameters exist, and what the
|
||||||
string field names are. */
|
start/end of string field names are. */
|
||||||
|
|
||||||
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */
|
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */
|
||||||
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
|
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
|
||||||
|
#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */
|
||||||
|
|
||||||
/* This macro checks for a newline at the given position */
|
/* This macro checks for a newline at the given position */
|
||||||
|
|
||||||
#define IS_NEWLINE(p) \
|
#define IS_NEWLINE(p) \
|
||||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||||
((p) < NLBLOCK->PSEND && \
|
((p) < NLBLOCK->PSEND && \
|
||||||
_pcre_is_newline((p), NLBLOCK->PSEND, &(NLBLOCK->nllen), utf8) \
|
_pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\
|
||||||
) \
|
utf8)) \
|
||||||
: \
|
: \
|
||||||
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
|
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
|
||||||
(p)[0] == NLBLOCK->nl[0] && \
|
(p)[0] == NLBLOCK->nl[0] && \
|
||||||
@ -152,8 +203,8 @@ string field names are. */
|
|||||||
#define WAS_NEWLINE(p) \
|
#define WAS_NEWLINE(p) \
|
||||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||||
((p) > NLBLOCK->PSSTART && \
|
((p) > NLBLOCK->PSSTART && \
|
||||||
_pcre_was_newline((p), NLBLOCK->PSSTART, &(NLBLOCK->nllen), utf8) \
|
_pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
|
||||||
) \
|
&(NLBLOCK->nllen), utf8)) \
|
||||||
: \
|
: \
|
||||||
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
|
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
|
||||||
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
|
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
|
||||||
@ -178,10 +229,12 @@ must begin with PCRE_. */
|
|||||||
#define USPTR const unsigned char *
|
#define USPTR const unsigned char *
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Include the public PCRE header and the definitions of UCP character property
|
/* Include the public PCRE header and the definitions of UCP character property
|
||||||
values. */
|
values. */
|
||||||
|
|
||||||
#include "pcre.h"
|
#include <pcre.h>
|
||||||
#include "ucp.h"
|
#include "ucp.h"
|
||||||
|
|
||||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||||
@ -189,7 +242,9 @@ need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
|||||||
option on the command line. */
|
option on the command line. */
|
||||||
|
|
||||||
#ifdef VPCOMPAT
|
#ifdef VPCOMPAT
|
||||||
|
#define strlen(s) _strlen(s)
|
||||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||||
|
#define memcmp(s,c,n) _memcmp(s,c,n)
|
||||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||||
#define memmove(d,s,n) _memmove(d,s,n)
|
#define memmove(d,s,n) _memmove(d,s,n)
|
||||||
#define memset(s,c,n) _memset(s,c,n)
|
#define memset(s,c,n) _memset(s,c,n)
|
||||||
@ -198,23 +253,31 @@ option on the command line. */
|
|||||||
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
|
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
|
||||||
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
|
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
|
||||||
is set. Otherwise, include an emulating function for those systems that have
|
is set. Otherwise, include an emulating function for those systems that have
|
||||||
neither (there some non-Unix environments where this is the case). This assumes
|
neither (there some non-Unix environments where this is the case). */
|
||||||
that all calls to memmove are moving strings upwards in store, which is the
|
|
||||||
case in PCRE. */
|
|
||||||
|
|
||||||
#if ! HAVE_MEMMOVE
|
#ifndef HAVE_MEMMOVE
|
||||||
#undef memmove /* some systems may have a macro */
|
#undef memmove /* some systems may have a macro */
|
||||||
#if HAVE_BCOPY
|
#ifdef HAVE_BCOPY
|
||||||
#define memmove(a, b, c) bcopy(b, a, c)
|
#define memmove(a, b, c) bcopy(b, a, c)
|
||||||
#else /* HAVE_BCOPY */
|
#else /* HAVE_BCOPY */
|
||||||
static void *
|
static void *
|
||||||
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
pcre_memmove(void *d, const void *s, size_t n)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
dest += n;
|
unsigned char *dest = (unsigned char *)d;
|
||||||
src += n;
|
const unsigned char *src = (const unsigned char *)s;
|
||||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
if (dest > src)
|
||||||
return dest;
|
{
|
||||||
|
dest += n;
|
||||||
|
src += n;
|
||||||
|
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||||
|
return (void *)dest;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (i = 0; i < n; ++i) *dest++ = *src++;
|
||||||
|
return (void *)(dest - n);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
||||||
#endif /* not HAVE_BCOPY */
|
#endif /* not HAVE_BCOPY */
|
||||||
@ -439,7 +502,8 @@ bits. */
|
|||||||
/* Masks for identifying the public options that are permitted at compile
|
/* Masks for identifying the public options that are permitted at compile
|
||||||
time, run time, or study time, respectively. */
|
time, run time, or study time, respectively. */
|
||||||
|
|
||||||
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY)
|
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
|
||||||
|
PCRE_NEWLINE_ANYCRLF)
|
||||||
|
|
||||||
#define PUBLIC_OPTIONS \
|
#define PUBLIC_OPTIONS \
|
||||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||||
@ -538,9 +602,9 @@ ESC_Z to detect the types that may be repeated. These are the types that
|
|||||||
consume characters. If any new escapes are put in between that don't consume a
|
consume characters. If any new escapes are put in between that don't consume a
|
||||||
character, that code will have to change. */
|
character, that code will have to change. */
|
||||||
|
|
||||||
enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
|
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||||
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_X, ESC_Z, ESC_z,
|
ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
|
||||||
ESC_E, ESC_Q, ESC_k, ESC_REF };
|
ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
|
||||||
|
|
||||||
|
|
||||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||||
@ -562,133 +626,138 @@ enum {
|
|||||||
|
|
||||||
OP_SOD, /* 1 Start of data: \A */
|
OP_SOD, /* 1 Start of data: \A */
|
||||||
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
||||||
OP_NOT_WORD_BOUNDARY, /* 3 \B */
|
OP_SET_SOM, /* 3 Set start of match (\K) */
|
||||||
OP_WORD_BOUNDARY, /* 4 \b */
|
OP_NOT_WORD_BOUNDARY, /* 4 \B */
|
||||||
OP_NOT_DIGIT, /* 5 \D */
|
OP_WORD_BOUNDARY, /* 5 \b */
|
||||||
OP_DIGIT, /* 6 \d */
|
OP_NOT_DIGIT, /* 6 \D */
|
||||||
OP_NOT_WHITESPACE, /* 7 \S */
|
OP_DIGIT, /* 7 \d */
|
||||||
OP_WHITESPACE, /* 8 \s */
|
OP_NOT_WHITESPACE, /* 8 \S */
|
||||||
OP_NOT_WORDCHAR, /* 9 \W */
|
OP_WHITESPACE, /* 9 \s */
|
||||||
OP_WORDCHAR, /* 10 \w */
|
OP_NOT_WORDCHAR, /* 10 \W */
|
||||||
OP_ANY, /* 11 Match any character */
|
OP_WORDCHAR, /* 11 \w */
|
||||||
OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
|
OP_ANY, /* 12 Match any character */
|
||||||
OP_NOTPROP, /* 13 \P (not Unicode property) */
|
OP_ANYBYTE, /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||||
OP_PROP, /* 14 \p (Unicode property) */
|
OP_NOTPROP, /* 14 \P (not Unicode property) */
|
||||||
OP_ANYNL, /* 15 \R (any newline sequence) */
|
OP_PROP, /* 15 \p (Unicode property) */
|
||||||
OP_EXTUNI, /* 16 \X (extended Unicode sequence */
|
OP_ANYNL, /* 16 \R (any newline sequence) */
|
||||||
OP_EODN, /* 17 End of data or \n at end of data: \Z. */
|
OP_NOT_HSPACE, /* 17 \H (not horizontal whitespace) */
|
||||||
OP_EOD, /* 18 End of data: \z */
|
OP_HSPACE, /* 18 \h (horizontal whitespace) */
|
||||||
|
OP_NOT_VSPACE, /* 19 \V (not vertical whitespace) */
|
||||||
|
OP_VSPACE, /* 20 \v (vertical whitespace) */
|
||||||
|
OP_EXTUNI, /* 21 \X (extended Unicode sequence */
|
||||||
|
OP_EODN, /* 22 End of data or \n at end of data: \Z. */
|
||||||
|
OP_EOD, /* 23 End of data: \z */
|
||||||
|
|
||||||
OP_OPT, /* 19 Set runtime options */
|
OP_OPT, /* 24 Set runtime options */
|
||||||
OP_CIRC, /* 20 Start of line - varies with multiline switch */
|
OP_CIRC, /* 25 Start of line - varies with multiline switch */
|
||||||
OP_DOLL, /* 21 End of line - varies with multiline switch */
|
OP_DOLL, /* 26 End of line - varies with multiline switch */
|
||||||
OP_CHAR, /* 22 Match one character, casefully */
|
OP_CHAR, /* 27 Match one character, casefully */
|
||||||
OP_CHARNC, /* 23 Match one character, caselessly */
|
OP_CHARNC, /* 28 Match one character, caselessly */
|
||||||
OP_NOT, /* 24 Match one character, not the following one */
|
OP_NOT, /* 29 Match one character, not the following one */
|
||||||
|
|
||||||
OP_STAR, /* 25 The maximizing and minimizing versions of */
|
OP_STAR, /* 30 The maximizing and minimizing versions of */
|
||||||
OP_MINSTAR, /* 26 these six opcodes must come in pairs, with */
|
OP_MINSTAR, /* 31 these six opcodes must come in pairs, with */
|
||||||
OP_PLUS, /* 27 the minimizing one second. */
|
OP_PLUS, /* 32 the minimizing one second. */
|
||||||
OP_MINPLUS, /* 28 This first set applies to single characters.*/
|
OP_MINPLUS, /* 33 This first set applies to single characters.*/
|
||||||
OP_QUERY, /* 29 */
|
OP_QUERY, /* 34 */
|
||||||
OP_MINQUERY, /* 30 */
|
OP_MINQUERY, /* 35 */
|
||||||
|
|
||||||
OP_UPTO, /* 31 From 0 to n matches */
|
OP_UPTO, /* 36 From 0 to n matches */
|
||||||
OP_MINUPTO, /* 32 */
|
OP_MINUPTO, /* 37 */
|
||||||
OP_EXACT, /* 33 Exactly n matches */
|
OP_EXACT, /* 38 Exactly n matches */
|
||||||
|
|
||||||
OP_POSSTAR, /* 34 Possessified star */
|
OP_POSSTAR, /* 39 Possessified star */
|
||||||
OP_POSPLUS, /* 35 Possessified plus */
|
OP_POSPLUS, /* 40 Possessified plus */
|
||||||
OP_POSQUERY, /* 36 Posesssified query */
|
OP_POSQUERY, /* 41 Posesssified query */
|
||||||
OP_POSUPTO, /* 37 Possessified upto */
|
OP_POSUPTO, /* 42 Possessified upto */
|
||||||
|
|
||||||
OP_NOTSTAR, /* 38 The maximizing and minimizing versions of */
|
OP_NOTSTAR, /* 43 The maximizing and minimizing versions of */
|
||||||
OP_NOTMINSTAR, /* 39 these six opcodes must come in pairs, with */
|
OP_NOTMINSTAR, /* 44 these six opcodes must come in pairs, with */
|
||||||
OP_NOTPLUS, /* 40 the minimizing one second. They must be in */
|
OP_NOTPLUS, /* 45 the minimizing one second. They must be in */
|
||||||
OP_NOTMINPLUS, /* 41 exactly the same order as those above. */
|
OP_NOTMINPLUS, /* 46 exactly the same order as those above. */
|
||||||
OP_NOTQUERY, /* 42 This set applies to "not" single characters. */
|
OP_NOTQUERY, /* 47 This set applies to "not" single characters. */
|
||||||
OP_NOTMINQUERY, /* 43 */
|
OP_NOTMINQUERY, /* 48 */
|
||||||
|
|
||||||
OP_NOTUPTO, /* 44 From 0 to n matches */
|
OP_NOTUPTO, /* 49 From 0 to n matches */
|
||||||
OP_NOTMINUPTO, /* 45 */
|
OP_NOTMINUPTO, /* 50 */
|
||||||
OP_NOTEXACT, /* 46 Exactly n matches */
|
OP_NOTEXACT, /* 51 Exactly n matches */
|
||||||
|
|
||||||
OP_NOTPOSSTAR, /* 47 Possessified versions */
|
OP_NOTPOSSTAR, /* 52 Possessified versions */
|
||||||
OP_NOTPOSPLUS, /* 48 */
|
OP_NOTPOSPLUS, /* 53 */
|
||||||
OP_NOTPOSQUERY, /* 49 */
|
OP_NOTPOSQUERY, /* 54 */
|
||||||
OP_NOTPOSUPTO, /* 50 */
|
OP_NOTPOSUPTO, /* 55 */
|
||||||
|
|
||||||
OP_TYPESTAR, /* 51 The maximizing and minimizing versions of */
|
OP_TYPESTAR, /* 56 The maximizing and minimizing versions of */
|
||||||
OP_TYPEMINSTAR, /* 52 these six opcodes must come in pairs, with */
|
OP_TYPEMINSTAR, /* 57 these six opcodes must come in pairs, with */
|
||||||
OP_TYPEPLUS, /* 53 the minimizing one second. These codes must */
|
OP_TYPEPLUS, /* 58 the minimizing one second. These codes must */
|
||||||
OP_TYPEMINPLUS, /* 54 be in exactly the same order as those above. */
|
OP_TYPEMINPLUS, /* 59 be in exactly the same order as those above. */
|
||||||
OP_TYPEQUERY, /* 55 This set applies to character types such as \d */
|
OP_TYPEQUERY, /* 60 This set applies to character types such as \d */
|
||||||
OP_TYPEMINQUERY, /* 56 */
|
OP_TYPEMINQUERY, /* 61 */
|
||||||
|
|
||||||
OP_TYPEUPTO, /* 57 From 0 to n matches */
|
OP_TYPEUPTO, /* 62 From 0 to n matches */
|
||||||
OP_TYPEMINUPTO, /* 58 */
|
OP_TYPEMINUPTO, /* 63 */
|
||||||
OP_TYPEEXACT, /* 59 Exactly n matches */
|
OP_TYPEEXACT, /* 64 Exactly n matches */
|
||||||
|
|
||||||
OP_TYPEPOSSTAR, /* 60 Possessified versions */
|
OP_TYPEPOSSTAR, /* 65 Possessified versions */
|
||||||
OP_TYPEPOSPLUS, /* 61 */
|
OP_TYPEPOSPLUS, /* 66 */
|
||||||
OP_TYPEPOSQUERY, /* 62 */
|
OP_TYPEPOSQUERY, /* 67 */
|
||||||
OP_TYPEPOSUPTO, /* 63 */
|
OP_TYPEPOSUPTO, /* 68 */
|
||||||
|
|
||||||
OP_CRSTAR, /* 64 The maximizing and minimizing versions of */
|
OP_CRSTAR, /* 69 The maximizing and minimizing versions of */
|
||||||
OP_CRMINSTAR, /* 65 all these opcodes must come in pairs, with */
|
OP_CRMINSTAR, /* 70 all these opcodes must come in pairs, with */
|
||||||
OP_CRPLUS, /* 66 the minimizing one second. These codes must */
|
OP_CRPLUS, /* 71 the minimizing one second. These codes must */
|
||||||
OP_CRMINPLUS, /* 67 be in exactly the same order as those above. */
|
OP_CRMINPLUS, /* 72 be in exactly the same order as those above. */
|
||||||
OP_CRQUERY, /* 68 These are for character classes and back refs */
|
OP_CRQUERY, /* 73 These are for character classes and back refs */
|
||||||
OP_CRMINQUERY, /* 69 */
|
OP_CRMINQUERY, /* 74 */
|
||||||
OP_CRRANGE, /* 70 These are different to the three sets above. */
|
OP_CRRANGE, /* 75 These are different to the three sets above. */
|
||||||
OP_CRMINRANGE, /* 71 */
|
OP_CRMINRANGE, /* 76 */
|
||||||
|
|
||||||
OP_CLASS, /* 72 Match a character class, chars < 256 only */
|
OP_CLASS, /* 77 Match a character class, chars < 256 only */
|
||||||
OP_NCLASS, /* 73 Same, but the bitmap was created from a negative
|
OP_NCLASS, /* 78 Same, but the bitmap was created from a negative
|
||||||
class - the difference is relevant only when a UTF-8
|
class - the difference is relevant only when a UTF-8
|
||||||
character > 255 is encountered. */
|
character > 255 is encountered. */
|
||||||
|
|
||||||
OP_XCLASS, /* 74 Extended class for handling UTF-8 chars within the
|
OP_XCLASS, /* 79 Extended class for handling UTF-8 chars within the
|
||||||
class. This does both positive and negative. */
|
class. This does both positive and negative. */
|
||||||
|
|
||||||
OP_REF, /* 75 Match a back reference */
|
OP_REF, /* 80 Match a back reference */
|
||||||
OP_RECURSE, /* 76 Match a numbered subpattern (possibly recursive) */
|
OP_RECURSE, /* 81 Match a numbered subpattern (possibly recursive) */
|
||||||
OP_CALLOUT, /* 77 Call out to external function if provided */
|
OP_CALLOUT, /* 82 Call out to external function if provided */
|
||||||
|
|
||||||
OP_ALT, /* 78 Start of alternation */
|
OP_ALT, /* 83 Start of alternation */
|
||||||
OP_KET, /* 79 End of group that doesn't have an unbounded repeat */
|
OP_KET, /* 84 End of group that doesn't have an unbounded repeat */
|
||||||
OP_KETRMAX, /* 80 These two must remain together and in this */
|
OP_KETRMAX, /* 85 These two must remain together and in this */
|
||||||
OP_KETRMIN, /* 81 order. They are for groups the repeat for ever. */
|
OP_KETRMIN, /* 86 order. They are for groups the repeat for ever. */
|
||||||
|
|
||||||
/* The assertions must come before BRA, CBRA, ONCE, and COND.*/
|
/* The assertions must come before BRA, CBRA, ONCE, and COND.*/
|
||||||
|
|
||||||
OP_ASSERT, /* 82 Positive lookahead */
|
OP_ASSERT, /* 87 Positive lookahead */
|
||||||
OP_ASSERT_NOT, /* 83 Negative lookahead */
|
OP_ASSERT_NOT, /* 88 Negative lookahead */
|
||||||
OP_ASSERTBACK, /* 84 Positive lookbehind */
|
OP_ASSERTBACK, /* 89 Positive lookbehind */
|
||||||
OP_ASSERTBACK_NOT, /* 85 Negative lookbehind */
|
OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */
|
||||||
OP_REVERSE, /* 86 Move pointer back - used in lookbehind assertions */
|
OP_REVERSE, /* 91 Move pointer back - used in lookbehind assertions */
|
||||||
|
|
||||||
/* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
|
/* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
|
||||||
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
|
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||||
|
|
||||||
OP_ONCE, /* 87 Atomic group */
|
OP_ONCE, /* 92 Atomic group */
|
||||||
OP_BRA, /* 88 Start of non-capturing bracket */
|
OP_BRA, /* 83 Start of non-capturing bracket */
|
||||||
OP_CBRA, /* 89 Start of capturing bracket */
|
OP_CBRA, /* 94 Start of capturing bracket */
|
||||||
OP_COND, /* 90 Conditional group */
|
OP_COND, /* 95 Conditional group */
|
||||||
|
|
||||||
/* These three must follow the previous three, in the same order. There's a
|
/* These three must follow the previous three, in the same order. There's a
|
||||||
check for >= SBRA to distinguish the two sets. */
|
check for >= SBRA to distinguish the two sets. */
|
||||||
|
|
||||||
OP_SBRA, /* 91 Start of non-capturing bracket, check empty */
|
OP_SBRA, /* 96 Start of non-capturing bracket, check empty */
|
||||||
OP_SCBRA, /* 92 Start of capturing bracket, check empty */
|
OP_SCBRA, /* 97 Start of capturing bracket, check empty */
|
||||||
OP_SCOND, /* 93 Conditional group, check empty */
|
OP_SCOND, /* 98 Conditional group, check empty */
|
||||||
|
|
||||||
OP_CREF, /* 94 Used to hold a capture number as condition */
|
OP_CREF, /* 99 Used to hold a capture number as condition */
|
||||||
OP_RREF, /* 95 Used to hold a recursion number as condition */
|
OP_RREF, /* 100 Used to hold a recursion number as condition */
|
||||||
OP_DEF, /* 96 The DEFINE condition */
|
OP_DEF, /* 101 The DEFINE condition */
|
||||||
|
|
||||||
OP_BRAZERO, /* 97 These two must remain together and in this */
|
OP_BRAZERO, /* 102 These two must remain together and in this */
|
||||||
OP_BRAMINZERO /* 98 order. */
|
OP_BRAMINZERO /* 103 order. */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -696,10 +765,10 @@ enum {
|
|||||||
for debugging. The macro is referenced only in pcre_printint.c. */
|
for debugging. The macro is referenced only in pcre_printint.c. */
|
||||||
|
|
||||||
#define OP_NAME_LIST \
|
#define OP_NAME_LIST \
|
||||||
"End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \
|
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
|
||||||
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \
|
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \
|
||||||
"notprop", "prop", "anynl", "extuni", \
|
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
||||||
"\\Z", "\\z", \
|
"extuni", "\\Z", "\\z", \
|
||||||
"Opt", "^", "$", "char", "charnc", "not", \
|
"Opt", "^", "$", "char", "charnc", "not", \
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||||
"*+","++", "?+", "{", \
|
"*+","++", "?+", "{", \
|
||||||
@ -726,9 +795,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||||||
|
|
||||||
#define OP_LENGTHS \
|
#define OP_LENGTHS \
|
||||||
1, /* End */ \
|
1, /* End */ \
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
|
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
|
||||||
|
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
|
||||||
1, 1, /* Any, Anybyte */ \
|
1, 1, /* Any, Anybyte */ \
|
||||||
3, 3, 1, 1, /* NOTPROP, PROP, EXTUNI, ANYNL */ \
|
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \
|
||||||
|
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||||
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
||||||
2, /* Char - the minimum length */ \
|
2, /* Char - the minimum length */ \
|
||||||
2, /* Charnc - the minimum length */ \
|
2, /* Charnc - the minimum length */ \
|
||||||
@ -788,7 +859,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
|||||||
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
||||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57 };
|
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58 };
|
||||||
|
|
||||||
/* The real format of the start of the pcre block; the index of names and the
|
/* The real format of the start of the pcre block; the index of names and the
|
||||||
code vector run on as long as necessary after the end. We store an explicit
|
code vector run on as long as necessary after the end. We store an explicit
|
||||||
@ -877,21 +948,11 @@ typedef struct recursion_info {
|
|||||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||||
int group_num; /* Number of group that was called */
|
int group_num; /* Number of group that was called */
|
||||||
const uschar *after_call; /* "Return value": points after the call in the expr */
|
const uschar *after_call; /* "Return value": points after the call in the expr */
|
||||||
USPTR save_start; /* Old value of md->start_match */
|
USPTR save_start; /* Old value of mstart */
|
||||||
int *offset_save; /* Pointer to start of saved offsets */
|
int *offset_save; /* Pointer to start of saved offsets */
|
||||||
int saved_max; /* Number of saved offsets */
|
int saved_max; /* Number of saved offsets */
|
||||||
} recursion_info;
|
} recursion_info;
|
||||||
|
|
||||||
/* When compiling in a mode that doesn't use recursive calls to match(),
|
|
||||||
a structure is used to remember local variables on the heap. It is defined in
|
|
||||||
pcre_exec.c, close to the match() function, so that it is easy to keep it in
|
|
||||||
step with any changes of local variable. However, the pointer to the current
|
|
||||||
frame must be saved in some "static" place over a longjmp(). We declare the
|
|
||||||
structure here so that we can put a pointer in the match_data structure. NOTE:
|
|
||||||
This isn't used for a "normal" compilation of pcre. */
|
|
||||||
|
|
||||||
struct heapframe;
|
|
||||||
|
|
||||||
/* Structure for building a chain of data for holding the values of the subject
|
/* Structure for building a chain of data for holding the values of the subject
|
||||||
pointer at the start of each subpattern, so as to detect when an empty string
|
pointer at the start of each subpattern, so as to detect when an empty string
|
||||||
has been matched by a subpattern - to break infinite loops. */
|
has been matched by a subpattern - to break infinite loops. */
|
||||||
@ -928,7 +989,7 @@ typedef struct match_data {
|
|||||||
const uschar *start_code; /* For use when recursing */
|
const uschar *start_code; /* For use when recursing */
|
||||||
USPTR start_subject; /* Start of the subject string */
|
USPTR start_subject; /* Start of the subject string */
|
||||||
USPTR end_subject; /* End of the subject string */
|
USPTR end_subject; /* End of the subject string */
|
||||||
USPTR start_match; /* Start of this match attempt */
|
USPTR start_match_ptr; /* Start of matched string */
|
||||||
USPTR end_match_ptr; /* Subject position at end match */
|
USPTR end_match_ptr; /* Subject position at end match */
|
||||||
int end_offset_top; /* Highwater mark at end of match */
|
int end_offset_top; /* Highwater mark at end of match */
|
||||||
int capture_last; /* Most recent capture number */
|
int capture_last; /* Most recent capture number */
|
||||||
@ -937,7 +998,6 @@ typedef struct match_data {
|
|||||||
int eptrn; /* Next free eptrblock */
|
int eptrn; /* Next free eptrblock */
|
||||||
recursion_info *recursive; /* Linked list of recursion data */
|
recursion_info *recursive; /* Linked list of recursion data */
|
||||||
void *callout_data; /* To pass back to callouts */
|
void *callout_data; /* To pass back to callouts */
|
||||||
struct heapframe *thisframe; /* Used only when compiling for no recursion */
|
|
||||||
} match_data;
|
} match_data;
|
||||||
|
|
||||||
/* A similar structure is used for the same purpose by the DFA matching
|
/* A similar structure is used for the same purpose by the DFA matching
|
||||||
@ -1024,16 +1084,16 @@ extern const uschar _pcre_OP_lengths[];
|
|||||||
one of the exported public functions. They have to be "external" in the C
|
one of the exported public functions. They have to be "external" in the C
|
||||||
sense, but are not part of the PCRE public API. */
|
sense, but are not part of the PCRE public API. */
|
||||||
|
|
||||||
extern BOOL _pcre_is_newline(const uschar *, const uschar *, int *,
|
extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
|
||||||
BOOL);
|
int *, BOOL);
|
||||||
extern int _pcre_ord2utf8(int, uschar *);
|
extern int _pcre_ord2utf8(int, uschar *);
|
||||||
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
|
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||||
const pcre_study_data *, pcre_study_data *);
|
const pcre_study_data *, pcre_study_data *);
|
||||||
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
|
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
|
||||||
extern unsigned int _pcre_ucp_othercase(const unsigned int);
|
extern unsigned int _pcre_ucp_othercase(const unsigned int);
|
||||||
extern int _pcre_valid_utf8(const uschar *, int);
|
extern int _pcre_valid_utf8(const uschar *, int);
|
||||||
extern BOOL _pcre_was_newline(const uschar *, const uschar *, int *,
|
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
|
||||||
BOOL);
|
int *, BOOL);
|
||||||
extern BOOL _pcre_xclass(int, const uschar *);
|
extern BOOL _pcre_xclass(int, const uschar *);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -42,9 +42,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
one kind of newline is to be recognized. When a newline is found, its length is
|
one kind of newline is to be recognized. When a newline is found, its length is
|
||||||
returned. In principle, we could implement several newline "types", each
|
returned. In principle, we could implement several newline "types", each
|
||||||
referring to a different set of newline characters. At present, PCRE supports
|
referring to a different set of newline characters. At present, PCRE supports
|
||||||
only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL,
|
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||||
so for now the type isn't passed into the functions. It can easily be added
|
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||||
later if required. The full list of Unicode newline characters is taken from
|
|
||||||
http://unicode.org/unicode/reports/tr18/. */
|
http://unicode.org/unicode/reports/tr18/. */
|
||||||
|
|
||||||
|
|
||||||
@ -61,6 +60,7 @@ string that is being processed.
|
|||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
ptr pointer to possible newline
|
ptr pointer to possible newline
|
||||||
|
type the newline type
|
||||||
endptr pointer to the end of the string
|
endptr pointer to the end of the string
|
||||||
lenptr where to return the length
|
lenptr where to return the length
|
||||||
utf8 TRUE if in utf8 mode
|
utf8 TRUE if in utf8 mode
|
||||||
@ -69,12 +69,23 @@ Returns: TRUE or FALSE
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
BOOL
|
BOOL
|
||||||
_pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr,
|
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
|
||||||
BOOL utf8)
|
int *lenptr, BOOL utf8)
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
|
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
|
||||||
switch(c)
|
|
||||||
|
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||||
|
{
|
||||||
|
case 0x000a: *lenptr = 1; return TRUE; /* LF */
|
||||||
|
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||||
|
return TRUE; /* CR */
|
||||||
|
default: return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* NLTYPE_ANY */
|
||||||
|
|
||||||
|
else switch(c)
|
||||||
{
|
{
|
||||||
case 0x000a: /* LF */
|
case 0x000a: /* LF */
|
||||||
case 0x000b: /* VT */
|
case 0x000b: /* VT */
|
||||||
@ -99,6 +110,7 @@ the string that is being processed.
|
|||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
ptr pointer to possible newline
|
ptr pointer to possible newline
|
||||||
|
type the newline type
|
||||||
startptr pointer to the start of the string
|
startptr pointer to the start of the string
|
||||||
lenptr where to return the length
|
lenptr where to return the length
|
||||||
utf8 TRUE if in utf8 mode
|
utf8 TRUE if in utf8 mode
|
||||||
@ -107,8 +119,8 @@ Returns: TRUE or FALSE
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
BOOL
|
BOOL
|
||||||
_pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr,
|
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
|
||||||
BOOL utf8)
|
int *lenptr, BOOL utf8)
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
ptr--;
|
ptr--;
|
||||||
@ -118,7 +130,16 @@ if (utf8)
|
|||||||
GETCHAR(c, ptr);
|
GETCHAR(c, ptr);
|
||||||
}
|
}
|
||||||
else c = *ptr;
|
else c = *ptr;
|
||||||
switch(c)
|
|
||||||
|
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||||
|
{
|
||||||
|
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||||
|
return TRUE; /* LF */
|
||||||
|
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||||
|
default: return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
else switch(c)
|
||||||
{
|
{
|
||||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||||
return TRUE; /* LF */
|
return TRUE; /* LF */
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -62,6 +62,7 @@ Returns: number of characters placed in the buffer
|
|||||||
int
|
int
|
||||||
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
||||||
{
|
{
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
register int i, j;
|
register int i, j;
|
||||||
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
||||||
if (cvalue <= _pcre_utf8_table1[i]) break;
|
if (cvalue <= _pcre_utf8_table1[i]) break;
|
||||||
@ -73,6 +74,9 @@ for (j = i; j > 0; j--)
|
|||||||
}
|
}
|
||||||
*buffer = _pcre_utf8_table2[i] | cvalue;
|
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||||
return i + 1;
|
return i + 1;
|
||||||
|
#else
|
||||||
|
return 0; /* Keep compiler happy; this function won't ever be */
|
||||||
|
#endif /* called when SUPPORT_UTF8 is not defined. */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of pcre_ord2utf8.c */
|
/* End of pcre_ord2utf8.c */
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -63,7 +63,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
|
|||||||
a negative error number
|
a negative error number
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE_DATA_SCOPE int
|
PCRE_EXP_DEFN int
|
||||||
pcre_refcount(pcre *argument_re, int adjust)
|
pcre_refcount(pcre *argument_re, int adjust)
|
||||||
{
|
{
|
||||||
real_pcre *re = (real_pcre *)argument_re;
|
real_pcre *re = (real_pcre *)argument_re;
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -394,11 +394,13 @@ do
|
|||||||
character with a value > 255. */
|
character with a value > 255. */
|
||||||
|
|
||||||
case OP_NCLASS:
|
case OP_NCLASS:
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
if (utf8)
|
if (utf8)
|
||||||
{
|
{
|
||||||
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||||
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
/* Fall through */
|
/* Fall through */
|
||||||
|
|
||||||
case OP_CLASS:
|
case OP_CLASS:
|
||||||
@ -411,6 +413,7 @@ do
|
|||||||
value is > 127. In fact, there are only two possible starting bytes for
|
value is > 127. In fact, there are only two possible starting bytes for
|
||||||
characters in the range 128 - 255. */
|
characters in the range 128 - 255. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
if (utf8)
|
if (utf8)
|
||||||
{
|
{
|
||||||
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
||||||
@ -428,6 +431,7 @@ do
|
|||||||
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
||||||
|
|
||||||
else
|
else
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||||
}
|
}
|
||||||
@ -487,7 +491,7 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
|
|||||||
NULL on error or if no optimization possible
|
NULL on error or if no optimization possible
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE_DATA_SCOPE pcre_extra *
|
PCRE_EXP_DEFN pcre_extra *
|
||||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||||
{
|
{
|
||||||
uschar start_bits[32];
|
uschar start_bits[32];
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -61,6 +61,8 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
|||||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||||
character. */
|
character. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
|
|
||||||
const int _pcre_utf8_table1[] =
|
const int _pcre_utf8_table1[] =
|
||||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||||
|
|
||||||
@ -301,4 +303,6 @@ const ucp_type_table _pcre_utt[] = {
|
|||||||
|
|
||||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UTF8 */
|
||||||
|
|
||||||
/* End of pcre_tables.c */
|
/* End of pcre_tables.c */
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -75,7 +75,7 @@ I could find no way of detecting that a macro is defined as an empty string at
|
|||||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||||
the STRING macro with an empty argument when doing the test. */
|
the STRING macro with an empty argument when doing the test. */
|
||||||
|
|
||||||
PCRE_DATA_SCOPE const char *
|
PCRE_EXP_DEFN const char *
|
||||||
pcre_version(void)
|
pcre_version(void)
|
||||||
{
|
{
|
||||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
Copyright (c) 1997-2007 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
Loading…
Reference in New Issue
Block a user