mirror of
				https://gitlab.gnome.org/GNOME/glib.git
				synced 2025-11-04 01:58:54 +01:00 
			
		
		
		
	Update to PCRE 7.2
svn path=/trunk/; revision=5659
This commit is contained in:
		@@ -1,3 +1,7 @@
 | 
			
		||||
2007-07-31  Matthias Clasen  <mclasen@redhat.com>
 | 
			
		||||
 | 
			
		||||
	* glib/pcre/*: Update the internal PCRE to 7.2
 | 
			
		||||
 | 
			
		||||
2007-07-31  Matthias Clasen  <mclasen@redhat.com>
 | 
			
		||||
 | 
			
		||||
	* glib/pltcheck.sh: Fix some glitches
 | 
			
		||||
 
 | 
			
		||||
@@ -1,68 +1,5 @@
 | 
			
		||||
PCRE LICENCE
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
PCRE is a library of functions to support regular expressions whose syntax
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
 | 
			
		||||
specified below. The documentation for PCRE, supplied in the "doc"
 | 
			
		||||
directory, is distributed under the same terms as the software itself.
 | 
			
		||||
 | 
			
		||||
The basic library functions are written in C and are freestanding. Also
 | 
			
		||||
included in the distribution is a set of C++ wrapper functions.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
THE BASIC LIBRARY FUNCTIONS
 | 
			
		||||
---------------------------
 | 
			
		||||
 | 
			
		||||
Written by:       Philip Hazel
 | 
			
		||||
Email local part: ph10
 | 
			
		||||
Email domain:     cam.ac.uk
 | 
			
		||||
 | 
			
		||||
University of Cambridge Computing Service,
 | 
			
		||||
Cambridge, England. Phone: +44 1223 334714.
 | 
			
		||||
 | 
			
		||||
Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
THE C++ WRAPPER FUNCTIONS
 | 
			
		||||
-------------------------
 | 
			
		||||
 | 
			
		||||
Contributed by:   Google Inc.
 | 
			
		||||
 | 
			
		||||
Copyright (c) 2006, Google Inc.
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
THE "BSD" LICENCE
 | 
			
		||||
-----------------
 | 
			
		||||
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are met:
 | 
			
		||||
 | 
			
		||||
    * Redistributions of source code must retain the above copyright notice,
 | 
			
		||||
      this list of conditions and the following disclaimer.
 | 
			
		||||
 | 
			
		||||
    * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
      notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
      documentation and/or other materials provided with the distribution.
 | 
			
		||||
 | 
			
		||||
    * Neither the name of the University of Cambridge nor the name of Google
 | 
			
		||||
      Inc. nor the names of their contributors may be used to endorse or
 | 
			
		||||
      promote products derived from this software without specific prior
 | 
			
		||||
      written permission.
 | 
			
		||||
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
			
		||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 | 
			
		||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 | 
			
		||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 | 
			
		||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 | 
			
		||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 | 
			
		||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 | 
			
		||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 | 
			
		||||
POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
Please see the file LICENCE in the PCRE distribution for licensing details.
 | 
			
		||||
 | 
			
		||||
End
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@ INCLUDES = \
 | 
			
		||||
	-DMAX_NAME_COUNT=10000 \
 | 
			
		||||
	-DMAX_DUPLENGTH=30000 \
 | 
			
		||||
	-DLINK_SIZE=2 \
 | 
			
		||||
	-DEBCDIC=0 \
 | 
			
		||||
	-UEBCDIC \
 | 
			
		||||
	-DPOSIX_MALLOC_THRESHOLD=10 \
 | 
			
		||||
	-I$(top_srcdir) \
 | 
			
		||||
	-I$(srcdir) \
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@
 | 
			
		||||
/* This is the public header file for the PCRE library, to be #included by
 | 
			
		||||
applications that call the PCRE functions.
 | 
			
		||||
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -41,47 +41,31 @@ POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 | 
			
		||||
/* The current PCRE version information. */
 | 
			
		||||
 | 
			
		||||
/* NOTES FOR FUTURE MAINTAINERS: Do not use numbers with leading zeros, because
 | 
			
		||||
they may be treated as octal constants. The PCRE_PRERELEASE feature is for
 | 
			
		||||
identifying release candidates. It might be defined as -RC2, for example. In
 | 
			
		||||
real releases, it should be defined empty. Do not change the alignment of these
 | 
			
		||||
statments. The code in ./configure greps out the version numbers by using "cut"
 | 
			
		||||
to get values from column 29 onwards. These are substituted into pcre-config
 | 
			
		||||
and libpcre.pc. The values are not put into configure.ac and substituted here
 | 
			
		||||
(which would simplify this issue) because that makes life harder for those who
 | 
			
		||||
cannot run ./configure. As it now stands, this file need not be edited in that
 | 
			
		||||
circumstance. */
 | 
			
		||||
 | 
			
		||||
#define PCRE_MAJOR          7
 | 
			
		||||
#define PCRE_MINOR          0
 | 
			
		||||
#define PCRE_MINOR          2
 | 
			
		||||
#define PCRE_PRERELEASE     
 | 
			
		||||
#define PCRE_DATE           18-Dec-2006
 | 
			
		||||
#define PCRE_DATE           2007-06-19
 | 
			
		||||
 | 
			
		||||
/* Win32 uses DLL by default; it needs special stuff for exported functions
 | 
			
		||||
when building PCRE. */
 | 
			
		||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
 | 
			
		||||
imported have to be identified as such. When building PCRE, the appropriate
 | 
			
		||||
export setting is defined in pcre_internal.h, which includes this file. So we
 | 
			
		||||
don't change an existing definition of PCRE_EXP_DECL. */
 | 
			
		||||
 | 
			
		||||
/* But don't do that when building as part of GLib */
 | 
			
		||||
#if 0
 | 
			
		||||
#ifndef PCRE_EXP_DECL
 | 
			
		||||
#  ifdef _WIN32
 | 
			
		||||
#  ifdef PCRE_DEFINITION
 | 
			
		||||
#    ifdef DLL_EXPORT
 | 
			
		||||
#      define PCRE_DATA_SCOPE __declspec(dllexport)
 | 
			
		||||
#    endif
 | 
			
		||||
#  else
 | 
			
		||||
#    ifndef PCRE_STATIC
 | 
			
		||||
#      define PCRE_DATA_SCOPE extern __declspec(dllimport)
 | 
			
		||||
#    endif
 | 
			
		||||
#      define PCRE_EXP_DECL extern __declspec(dllimport)
 | 
			
		||||
#    endif
 | 
			
		||||
#  endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* Otherwise, we use the standard "extern". */
 | 
			
		||||
/* By default, we use the standard "extern" declarations. */
 | 
			
		||||
 | 
			
		||||
#ifndef PCRE_DATA_SCOPE
 | 
			
		||||
#ifndef PCRE_EXP_DECL
 | 
			
		||||
#  ifdef __cplusplus
 | 
			
		||||
#    define PCRE_DATA_SCOPE     extern "C"
 | 
			
		||||
#    define PCRE_EXP_DECL       extern "C"
 | 
			
		||||
#  else
 | 
			
		||||
#    define PCRE_DATA_SCOPE     extern
 | 
			
		||||
#    define PCRE_EXP_DECL       extern
 | 
			
		||||
#  endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@@ -122,6 +106,7 @@ extern "C" {
 | 
			
		||||
#define PCRE_NEWLINE_LF         0x00200000
 | 
			
		||||
#define PCRE_NEWLINE_CRLF       0x00300000
 | 
			
		||||
#define PCRE_NEWLINE_ANY        0x00400000
 | 
			
		||||
#define PCRE_NEWLINE_ANYCRLF    0x00500000
 | 
			
		||||
 | 
			
		||||
/* Exec-time and get/set-time error codes */
 | 
			
		||||
 | 
			
		||||
@@ -165,6 +150,8 @@ extern "C" {
 | 
			
		||||
#define PCRE_INFO_NAMETABLE          9
 | 
			
		||||
#define PCRE_INFO_STUDYSIZE         10
 | 
			
		||||
#define PCRE_INFO_DEFAULT_TABLES    11
 | 
			
		||||
#define PCRE_INFO_OKPARTIAL         12
 | 
			
		||||
#define PCRE_INFO_JCHANGED          13
 | 
			
		||||
 | 
			
		||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
 | 
			
		||||
compatible. */
 | 
			
		||||
@@ -243,41 +230,41 @@ typedef struct pcre_callout_block {
 | 
			
		||||
#define pcre_free g_free
 | 
			
		||||
#define pcre_stack_malloc g_try_malloc
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE int   (*pcre_callout)(pcre_callout_block *);
 | 
			
		||||
PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *);
 | 
			
		||||
 | 
			
		||||
/* Exported PCRE functions */
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
 | 
			
		||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
 | 
			
		||||
                  const unsigned char *);
 | 
			
		||||
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
 | 
			
		||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
 | 
			
		||||
                  int *, const unsigned char *);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_config(int, void *);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_copy_named_substring(const pcre *, const char *,
 | 
			
		||||
PCRE_EXP_DECL int  pcre_config(int, void *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
 | 
			
		||||
                  int *, int, const char *, char *, int);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_copy_substring(const char *, int *, int, int, char *,
 | 
			
		||||
PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int, char *,
 | 
			
		||||
                  int);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_dfa_exec(const pcre *, const pcre_extra *,
 | 
			
		||||
PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *,
 | 
			
		||||
                  const char *, int, int, int, int *, int , int *, int);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
 | 
			
		||||
PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
 | 
			
		||||
                   int, int, int, int *, int);
 | 
			
		||||
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
 | 
			
		||||
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
 | 
			
		||||
PCRE_EXP_DECL void pcre_free_substring(const char *);
 | 
			
		||||
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
 | 
			
		||||
                  void *);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_get_named_substring(const pcre *, const char *,
 | 
			
		||||
PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *,
 | 
			
		||||
                  int *, int, const char *, const char **);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_get_stringnumber(const pcre *, const char *);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_get_stringtable_entries(const pcre *, const char *,
 | 
			
		||||
PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *,
 | 
			
		||||
                  char **, char **);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_get_substring(const char *, int *, int, int,
 | 
			
		||||
PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int,
 | 
			
		||||
                  const char **);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_get_substring_list(const char *, int *, int,
 | 
			
		||||
PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int,
 | 
			
		||||
                  const char ***);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_info(const pcre *, int *, int *);
 | 
			
		||||
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_refcount(pcre *, int);
 | 
			
		||||
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
 | 
			
		||||
PCRE_DATA_SCOPE const char *pcre_version(void);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_info(const pcre *, int *, int *);
 | 
			
		||||
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_refcount(pcre *, int);
 | 
			
		||||
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
 | 
			
		||||
PCRE_EXP_DECL const char *pcre_version(void);
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
}  /* extern "C" */
 | 
			
		||||
 
 | 
			
		||||
@@ -1,24 +1,24 @@
 | 
			
		||||
/* This file is autogenerated by ../update-pcre/update.sh during
 | 
			
		||||
 * the update of the local copy of PCRE.
 | 
			
		||||
 */
 | 
			
		||||
/*************************************************
 | 
			
		||||
*      Perl-Compatible Regular Expressions       *
 | 
			
		||||
*************************************************/
 | 
			
		||||
 | 
			
		||||
/* This file is automatically written by the dftables auxiliary 
 | 
			
		||||
program. If you edit it by hand, you might like to edit the Makefile to 
 | 
			
		||||
prevent its ever being regenerated.
 | 
			
		||||
/* This file contains character tables that are used when no external tables
 | 
			
		||||
are passed to PCRE by the application that calls it. The tables are used only
 | 
			
		||||
for characters whose code values are less than 256.
 | 
			
		||||
 | 
			
		||||
This file contains the default tables for characters with codes less than
 | 
			
		||||
128 (ASCII characters). These tables are used when no external tables are
 | 
			
		||||
passed to PCRE.
 | 
			
		||||
This is a default version of the tables that assumes ASCII encoding. A program
 | 
			
		||||
called dftables (which is distributed with PCRE) can be used to build
 | 
			
		||||
alternative versions of this file. This is necessary if you are running in an
 | 
			
		||||
EBCDIC environment, or if you want to default to a different encoding, for
 | 
			
		||||
example ISO-8859-1. When dftables is run, it creates these tables in the
 | 
			
		||||
current locale. If PCRE is configured with --enable-rebuild-chartables, this
 | 
			
		||||
happens automatically.
 | 
			
		||||
 | 
			
		||||
The following #include is present because without it gcc 4.x may remove
 | 
			
		||||
the array definition from the final binary if PCRE is built into a static
 | 
			
		||||
library and dead code stripping is activated. This leads to link errors.
 | 
			
		||||
Pulling in the header ensures that the array gets flagged as "someone
 | 
			
		||||
outside this compilation unit might reference this" and so it will always
 | 
			
		||||
be supplied to the linker. */
 | 
			
		||||
The following #include is present because without it gcc 4.x may remove the
 | 
			
		||||
array definition from the final binary if PCRE is built into a static library
 | 
			
		||||
and dead code stripping is activated. This leads to link errors. Pulling in the
 | 
			
		||||
header ensures that the array gets flagged as "someone outside this compilation
 | 
			
		||||
unit might reference this" and so it will always be supplied to the linker. */
 | 
			
		||||
 | 
			
		||||
#include "pcre_internal.h"
 | 
			
		||||
 | 
			
		||||
@@ -94,11 +94,10 @@ const unsigned char _pcre_default_tables[] = {
 | 
			
		||||
  240,241,242,243,244,245,246,247,
 | 
			
		||||
  248,249,250,251,252,253,254,255,
 | 
			
		||||
 | 
			
		||||
/* This table contains bit maps for various character classes.
 | 
			
		||||
Each map is 32 bytes long and the bits run from the least
 | 
			
		||||
significant end of each byte. The classes that have their own
 | 
			
		||||
maps are: space, xdigit, digit, upper, lower, word, graph
 | 
			
		||||
print, punct, and cntrl. Other classes are built from combinations. */
 | 
			
		||||
/* This table contains bit maps for various character classes. Each map is 32
 | 
			
		||||
bytes long and the bits run from the least significant end of each byte. The
 | 
			
		||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
 | 
			
		||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
 | 
			
		||||
 | 
			
		||||
  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
 | 
			
		||||
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
 | 
			
		||||
@@ -192,4 +191,4 @@ print, punct, and cntrl. Other classes are built from combinations. */
 | 
			
		||||
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
 | 
			
		||||
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
 | 
			
		||||
 | 
			
		||||
/* End of chartables.c */
 | 
			
		||||
/* End of pcre_chartables.c */
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -58,6 +58,11 @@ used by pcretest. DEBUG is not defined when building a production library. */
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* Macro for setting individual bits in class bitmaps. */
 | 
			
		||||
 | 
			
		||||
#define SETBIT(a,b) a[b/8] |= (1 << (b%8))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*************************************************
 | 
			
		||||
*      Code parameters and static tables         *
 | 
			
		||||
*************************************************/
 | 
			
		||||
@@ -82,17 +87,17 @@ are simple data values; negative values are for special things like \d and so
 | 
			
		||||
on. Zero means further processing is needed (for things like \x), or the escape
 | 
			
		||||
is invalid. */
 | 
			
		||||
 | 
			
		||||
#if !EBCDIC   /* This is the "normal" table for ASCII systems */
 | 
			
		||||
#ifndef EBCDIC  /* This is the "normal" table for ASCII systems */
 | 
			
		||||
static const short int escapes[] = {
 | 
			
		||||
     0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
 | 
			
		||||
     0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
 | 
			
		||||
   '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */
 | 
			
		||||
     0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */
 | 
			
		||||
-ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */
 | 
			
		||||
-ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */
 | 
			
		||||
-ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */
 | 
			
		||||
-ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
 | 
			
		||||
   '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */
 | 
			
		||||
     0,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */
 | 
			
		||||
-ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0,      0, -ESC_w,   /* p - w */
 | 
			
		||||
-ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */
 | 
			
		||||
-ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */
 | 
			
		||||
     0,      0, -ESC_z                                            /* x - z */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@@ -106,18 +111,18 @@ static const short int escapes[] = {
 | 
			
		||||
/*  70 */     0,     0,      0,       0,      0,     0,      0,      0,
 | 
			
		||||
/*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',
 | 
			
		||||
/*  80 */     0,     7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
 | 
			
		||||
/*  88 */     0,     0,      0,     '{',      0,     0,      0,      0,
 | 
			
		||||
/*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
 | 
			
		||||
/*  90 */     0,     0, -ESC_k,     'l',      0, ESC_n,      0, -ESC_p,
 | 
			
		||||
/*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
 | 
			
		||||
/*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,     0, -ESC_w,      0,
 | 
			
		||||
/*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,
 | 
			
		||||
/*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,
 | 
			
		||||
/*  B0 */     0,     0,      0,       0,      0,     0,      0,      0,
 | 
			
		||||
/*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',
 | 
			
		||||
/*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,
 | 
			
		||||
/*  C8 */     0,     0,      0,       0,      0,     0,      0,      0,
 | 
			
		||||
/*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,
 | 
			
		||||
/*  D0 */   '}',     0,      0,       0,      0,     0,      0, -ESC_P,
 | 
			
		||||
/*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,
 | 
			
		||||
/*  E0 */  '\\',     0, -ESC_S,       0,      0,     0, -ESC_W, -ESC_X,
 | 
			
		||||
/*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,
 | 
			
		||||
/*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,
 | 
			
		||||
/*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,
 | 
			
		||||
/*  F8 */     0,     0,      0,       0,      0,     0,      0,      0
 | 
			
		||||
@@ -221,7 +226,7 @@ static const char error_texts[] =
 | 
			
		||||
  "malformed number or name after (?(\0"
 | 
			
		||||
  "conditional group contains more than two branches\0"
 | 
			
		||||
  "assertion expected after (?(\0"
 | 
			
		||||
  "(?R or (?digits must be followed by )\0"
 | 
			
		||||
  "(?R or (?[+-]digits must be followed by )\0"
 | 
			
		||||
  /* 30 */
 | 
			
		||||
  "unknown POSIX class name\0"
 | 
			
		||||
  "POSIX collating elements are not supported\0"
 | 
			
		||||
@@ -255,7 +260,8 @@ static const char error_texts[] =
 | 
			
		||||
  /* 55 */
 | 
			
		||||
  "repeating a DEFINE group is not allowed\0"
 | 
			
		||||
  "inconsistent NEWLINE options\0"
 | 
			
		||||
  "\\g is not followed by an (optionally braced) non-zero number";
 | 
			
		||||
  "\\g is not followed by a braced name or an optionally braced non-zero number\0"
 | 
			
		||||
  "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number";
 | 
			
		||||
 | 
			
		||||
static const int error_texts_offsets[] = {
 | 
			
		||||
  0,
 | 
			
		||||
@@ -315,15 +321,14 @@ static const int error_texts_offsets[] = {
 | 
			
		||||
  1796,
 | 
			
		||||
  1839,
 | 
			
		||||
  1879,
 | 
			
		||||
  1908
 | 
			
		||||
  1908,
 | 
			
		||||
  1984
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* Definition to allow mutual recursion */
 | 
			
		||||
 | 
			
		||||
static BOOL
 | 
			
		||||
  compile_regex(int, int, uschar **, const uschar **, int *, BOOL, int, int *,
 | 
			
		||||
    int *, branch_chain *, compile_data *, int *);
 | 
			
		||||
  compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int,
 | 
			
		||||
    int *, int *, branch_chain *, compile_data *, int *);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -370,7 +375,7 @@ if (c == 0) *errorcodeptr = ERR1;
 | 
			
		||||
a table. A non-zero result is something that can be returned immediately.
 | 
			
		||||
Otherwise further processing may be required. */
 | 
			
		||||
 | 
			
		||||
#if !EBCDIC    /* ASCII coding */
 | 
			
		||||
#ifndef EBCDIC  /* ASCII coding */
 | 
			
		||||
else if (c < '0' || c > 'z') {}                           /* Not alphameric */
 | 
			
		||||
else if ((i = escapes[c - '0']) != 0) c = i;
 | 
			
		||||
 | 
			
		||||
@@ -401,11 +406,22 @@ else
 | 
			
		||||
 | 
			
		||||
    /* \g must be followed by a number, either plain or braced. If positive, it
 | 
			
		||||
    is an absolute backreference. If negative, it is a relative backreference.
 | 
			
		||||
    This is a Perl 5.10 feature. */
 | 
			
		||||
    This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
 | 
			
		||||
    reference to a named group. This is part of Perl's movement towards a
 | 
			
		||||
    unified syntax for back references. As this is synonymous with \k{name}, we
 | 
			
		||||
    fudge it up by pretending it really was \k. */
 | 
			
		||||
 | 
			
		||||
    case 'g':
 | 
			
		||||
    if (ptr[1] == '{')
 | 
			
		||||
      {
 | 
			
		||||
      const uschar *p;
 | 
			
		||||
      for (p = ptr+2; *p != 0 && *p != '}'; p++)
 | 
			
		||||
        if (*p != '-' && g_ascii_isdigit(*p) == 0) break;
 | 
			
		||||
      if (*p != 0 && *p != '}')
 | 
			
		||||
        {
 | 
			
		||||
        c = -ESC_k;
 | 
			
		||||
        break;
 | 
			
		||||
        }
 | 
			
		||||
      braced = TRUE;
 | 
			
		||||
      ptr++;
 | 
			
		||||
      }
 | 
			
		||||
@@ -511,7 +527,7 @@ else
 | 
			
		||||
        if (c == 0 && cc == '0') continue;     /* Leading zeroes */
 | 
			
		||||
        count++;
 | 
			
		||||
 | 
			
		||||
#if !EBCDIC    /* ASCII coding */
 | 
			
		||||
#ifndef EBCDIC  /* ASCII coding */
 | 
			
		||||
        if (cc >= 'a') cc -= 32;               /* Convert to upper case */
 | 
			
		||||
        c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
 | 
			
		||||
#else           /* EBCDIC coding */
 | 
			
		||||
@@ -538,7 +554,7 @@ else
 | 
			
		||||
      {
 | 
			
		||||
      int cc;                               /* Some compilers don't like ++ */
 | 
			
		||||
      cc = *(++ptr);                        /* in initializers */
 | 
			
		||||
#if !EBCDIC    /* ASCII coding */
 | 
			
		||||
#ifndef EBCDIC  /* ASCII coding */
 | 
			
		||||
      if (cc >= 'a') cc -= 32;              /* Convert to upper case */
 | 
			
		||||
      c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
 | 
			
		||||
#else           /* EBCDIC coding */
 | 
			
		||||
@@ -560,7 +576,7 @@ else
 | 
			
		||||
      return 0;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
#if !EBCDIC    /* ASCII coding */
 | 
			
		||||
#ifndef EBCDIC  /* ASCII coding */
 | 
			
		||||
    if (c >= 'a' && c <= 'z') c -= 32;
 | 
			
		||||
    c ^= 0x40;
 | 
			
		||||
#else           /* EBCDIC coding */
 | 
			
		||||
@@ -1195,6 +1211,7 @@ for (;;)
 | 
			
		||||
  else
 | 
			
		||||
    {
 | 
			
		||||
    code += _pcre_OP_lengths[c];
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
    if (utf8) switch(c)
 | 
			
		||||
      {
 | 
			
		||||
      case OP_CHAR:
 | 
			
		||||
@@ -1215,6 +1232,7 @@ for (;;)
 | 
			
		||||
      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
 | 
			
		||||
      break;
 | 
			
		||||
      }
 | 
			
		||||
#endif
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -1258,6 +1276,7 @@ for (;;)
 | 
			
		||||
  else
 | 
			
		||||
    {
 | 
			
		||||
    code += _pcre_OP_lengths[c];
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
    if (utf8) switch(c)
 | 
			
		||||
      {
 | 
			
		||||
      case OP_CHAR:
 | 
			
		||||
@@ -1278,6 +1297,7 @@ for (;;)
 | 
			
		||||
      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
 | 
			
		||||
      break;
 | 
			
		||||
      }
 | 
			
		||||
#endif
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -1315,6 +1335,18 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
 | 
			
		||||
 | 
			
		||||
  c = *code;
 | 
			
		||||
 | 
			
		||||
  /* Groups with zero repeats can of course be empty; skip them. */
 | 
			
		||||
 | 
			
		||||
  if (c == OP_BRAZERO || c == OP_BRAMINZERO)
 | 
			
		||||
    {
 | 
			
		||||
    code += _pcre_OP_lengths[c];
 | 
			
		||||
    do code += GET(code, 1); while (*code == OP_ALT);
 | 
			
		||||
    c = *code;
 | 
			
		||||
    continue;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  /* For other groups, scan the branches. */
 | 
			
		||||
 | 
			
		||||
  if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
 | 
			
		||||
    {
 | 
			
		||||
    BOOL empty_branch;
 | 
			
		||||
@@ -1331,12 +1363,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
 | 
			
		||||
      }
 | 
			
		||||
    while (*code == OP_ALT);
 | 
			
		||||
    if (!empty_branch) return FALSE;   /* All branches are non-empty */
 | 
			
		||||
 | 
			
		||||
    /* Move past the KET and fudge things so that the increment in the "for"
 | 
			
		||||
    above has no effect. */
 | 
			
		||||
 | 
			
		||||
    c = OP_END;
 | 
			
		||||
    code += 1 + LINK_SIZE - _pcre_OP_lengths[c];
 | 
			
		||||
    c = *code;
 | 
			
		||||
    continue;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -1872,6 +1899,50 @@ if (next >= 0) switch(op_code)
 | 
			
		||||
  case OP_NOT_WORDCHAR:
 | 
			
		||||
  return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;
 | 
			
		||||
 | 
			
		||||
  case OP_HSPACE:
 | 
			
		||||
  case OP_NOT_HSPACE:
 | 
			
		||||
  switch(next)
 | 
			
		||||
    {
 | 
			
		||||
    case 0x09:
 | 
			
		||||
    case 0x20:
 | 
			
		||||
    case 0xa0:
 | 
			
		||||
    case 0x1680:
 | 
			
		||||
    case 0x180e:
 | 
			
		||||
    case 0x2000:
 | 
			
		||||
    case 0x2001:
 | 
			
		||||
    case 0x2002:
 | 
			
		||||
    case 0x2003:
 | 
			
		||||
    case 0x2004:
 | 
			
		||||
    case 0x2005:
 | 
			
		||||
    case 0x2006:
 | 
			
		||||
    case 0x2007:
 | 
			
		||||
    case 0x2008:
 | 
			
		||||
    case 0x2009:
 | 
			
		||||
    case 0x200A:
 | 
			
		||||
    case 0x202f:
 | 
			
		||||
    case 0x205f:
 | 
			
		||||
    case 0x3000:
 | 
			
		||||
    return op_code != OP_HSPACE;
 | 
			
		||||
    default:
 | 
			
		||||
    return op_code == OP_HSPACE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  case OP_VSPACE:
 | 
			
		||||
  case OP_NOT_VSPACE:
 | 
			
		||||
  switch(next)
 | 
			
		||||
    {
 | 
			
		||||
    case 0x0a:
 | 
			
		||||
    case 0x0b:
 | 
			
		||||
    case 0x0c:
 | 
			
		||||
    case 0x0d:
 | 
			
		||||
    case 0x85:
 | 
			
		||||
    case 0x2028:
 | 
			
		||||
    case 0x2029:
 | 
			
		||||
    return op_code != OP_VSPACE;
 | 
			
		||||
    default:
 | 
			
		||||
    return op_code == OP_VSPACE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  default:
 | 
			
		||||
  return FALSE;
 | 
			
		||||
  }
 | 
			
		||||
@@ -1906,12 +1977,57 @@ switch(op_code)
 | 
			
		||||
    case ESC_W:
 | 
			
		||||
    return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
 | 
			
		||||
 | 
			
		||||
    case ESC_h:
 | 
			
		||||
    case ESC_H:
 | 
			
		||||
    switch(item)
 | 
			
		||||
      {
 | 
			
		||||
      case 0x09:
 | 
			
		||||
      case 0x20:
 | 
			
		||||
      case 0xa0:
 | 
			
		||||
      case 0x1680:
 | 
			
		||||
      case 0x180e:
 | 
			
		||||
      case 0x2000:
 | 
			
		||||
      case 0x2001:
 | 
			
		||||
      case 0x2002:
 | 
			
		||||
      case 0x2003:
 | 
			
		||||
      case 0x2004:
 | 
			
		||||
      case 0x2005:
 | 
			
		||||
      case 0x2006:
 | 
			
		||||
      case 0x2007:
 | 
			
		||||
      case 0x2008:
 | 
			
		||||
      case 0x2009:
 | 
			
		||||
      case 0x200A:
 | 
			
		||||
      case 0x202f:
 | 
			
		||||
      case 0x205f:
 | 
			
		||||
      case 0x3000:
 | 
			
		||||
      return -next != ESC_h;
 | 
			
		||||
      default:
 | 
			
		||||
      return -next == ESC_h;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    case ESC_v:
 | 
			
		||||
    case ESC_V:
 | 
			
		||||
    switch(item)
 | 
			
		||||
      {
 | 
			
		||||
      case 0x0a:
 | 
			
		||||
      case 0x0b:
 | 
			
		||||
      case 0x0c:
 | 
			
		||||
      case 0x0d:
 | 
			
		||||
      case 0x85:
 | 
			
		||||
      case 0x2028:
 | 
			
		||||
      case 0x2029:
 | 
			
		||||
      return -next != ESC_v;
 | 
			
		||||
      default:
 | 
			
		||||
      return -next == ESC_v;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    default:
 | 
			
		||||
    return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  case OP_DIGIT:
 | 
			
		||||
  return next == -ESC_D || next == -ESC_s || next == -ESC_W;
 | 
			
		||||
  return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
 | 
			
		||||
         next == -ESC_h || next == -ESC_v;
 | 
			
		||||
 | 
			
		||||
  case OP_NOT_DIGIT:
 | 
			
		||||
  return next == -ESC_d;
 | 
			
		||||
@@ -1920,10 +2036,23 @@ switch(op_code)
 | 
			
		||||
  return next == -ESC_S || next == -ESC_d || next == -ESC_w;
 | 
			
		||||
 | 
			
		||||
  case OP_NOT_WHITESPACE:
 | 
			
		||||
  return next == -ESC_s;
 | 
			
		||||
  return next == -ESC_s || next == -ESC_h || next == -ESC_v;
 | 
			
		||||
 | 
			
		||||
  case OP_HSPACE:
 | 
			
		||||
  return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;
 | 
			
		||||
 | 
			
		||||
  case OP_NOT_HSPACE:
 | 
			
		||||
  return next == -ESC_h;
 | 
			
		||||
 | 
			
		||||
  /* Can't have \S in here because VT matches \S (Perl anomaly) */
 | 
			
		||||
  case OP_VSPACE:
 | 
			
		||||
  return next == -ESC_V || next == -ESC_d || next == -ESC_w;
 | 
			
		||||
 | 
			
		||||
  case OP_NOT_VSPACE:
 | 
			
		||||
  return next == -ESC_v;
 | 
			
		||||
 | 
			
		||||
  case OP_WORDCHAR:
 | 
			
		||||
  return next == -ESC_W || next == -ESC_s;
 | 
			
		||||
  return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
 | 
			
		||||
 | 
			
		||||
  case OP_NOT_WORDCHAR:
 | 
			
		||||
  return next == -ESC_w || next == -ESC_d;
 | 
			
		||||
@@ -2038,10 +2167,12 @@ for (;; ptr++)
 | 
			
		||||
  BOOL possessive_quantifier;
 | 
			
		||||
  BOOL is_quantifier;
 | 
			
		||||
  BOOL is_recurse;
 | 
			
		||||
  BOOL reset_bracount;
 | 
			
		||||
  int class_charcount;
 | 
			
		||||
  int class_lastchar;
 | 
			
		||||
  int newoptions;
 | 
			
		||||
  int recno;
 | 
			
		||||
  int refsign;
 | 
			
		||||
  int skipbytes;
 | 
			
		||||
  int subreqbyte;
 | 
			
		||||
  int subfirstbyte;
 | 
			
		||||
@@ -2466,6 +2597,133 @@ for (;; ptr++)
 | 
			
		||||
          else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||
 | 
			
		||||
                   c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;
 | 
			
		||||
 | 
			
		||||
          /* We need to deal with \H, \h, \V, and \v in both phases because
 | 
			
		||||
          they use extra memory. */
 | 
			
		||||
 | 
			
		||||
          if (-c == ESC_h)
 | 
			
		||||
            {
 | 
			
		||||
            SETBIT(classbits, 0x09); /* VT */
 | 
			
		||||
            SETBIT(classbits, 0x20); /* SPACE */
 | 
			
		||||
            SETBIT(classbits, 0xa0); /* NSBP */
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
            if (utf8)
 | 
			
		||||
              {
 | 
			
		||||
              class_utf8 = TRUE;
 | 
			
		||||
              *class_utf8data++ = XCL_SINGLE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_SINGLE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_SINGLE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_SINGLE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_SINGLE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);
 | 
			
		||||
              }
 | 
			
		||||
#endif
 | 
			
		||||
            continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
          if (-c == ESC_H)
 | 
			
		||||
            {
 | 
			
		||||
            for (c = 0; c < 32; c++)
 | 
			
		||||
              {
 | 
			
		||||
              int x = 0xff;
 | 
			
		||||
              switch (c)
 | 
			
		||||
                {
 | 
			
		||||
                case 0x09/8: x ^= 1 << (0x09%8); break;
 | 
			
		||||
                case 0x20/8: x ^= 1 << (0x20%8); break;
 | 
			
		||||
                case 0xa0/8: x ^= 1 << (0xa0%8); break;
 | 
			
		||||
                default: break;
 | 
			
		||||
                }
 | 
			
		||||
              classbits[c] |= x;
 | 
			
		||||
              }
 | 
			
		||||
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
            if (utf8)
 | 
			
		||||
              {
 | 
			
		||||
              class_utf8 = TRUE;
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
 | 
			
		||||
              }
 | 
			
		||||
#endif
 | 
			
		||||
            continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
          if (-c == ESC_v)
 | 
			
		||||
            {
 | 
			
		||||
            SETBIT(classbits, 0x0a); /* LF */
 | 
			
		||||
            SETBIT(classbits, 0x0b); /* VT */
 | 
			
		||||
            SETBIT(classbits, 0x0c); /* FF */
 | 
			
		||||
            SETBIT(classbits, 0x0d); /* CR */
 | 
			
		||||
            SETBIT(classbits, 0x85); /* NEL */
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
            if (utf8)
 | 
			
		||||
              {
 | 
			
		||||
              class_utf8 = TRUE;
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
 | 
			
		||||
              }
 | 
			
		||||
#endif
 | 
			
		||||
            continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
          if (-c == ESC_V)
 | 
			
		||||
            {
 | 
			
		||||
            for (c = 0; c < 32; c++)
 | 
			
		||||
              {
 | 
			
		||||
              int x = 0xff;
 | 
			
		||||
              switch (c)
 | 
			
		||||
                {
 | 
			
		||||
                case 0x0a/8: x ^= 1 << (0x0a%8);
 | 
			
		||||
                             x ^= 1 << (0x0b%8);
 | 
			
		||||
                             x ^= 1 << (0x0c%8);
 | 
			
		||||
                             x ^= 1 << (0x0d%8);
 | 
			
		||||
                             break;
 | 
			
		||||
                case 0x85/8: x ^= 1 << (0x85%8); break;
 | 
			
		||||
                default: break;
 | 
			
		||||
                }
 | 
			
		||||
              classbits[c] |= x;
 | 
			
		||||
              }
 | 
			
		||||
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
            if (utf8)
 | 
			
		||||
              {
 | 
			
		||||
              class_utf8 = TRUE;
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);
 | 
			
		||||
              *class_utf8data++ = XCL_RANGE;
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
 | 
			
		||||
              class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
 | 
			
		||||
              }
 | 
			
		||||
#endif
 | 
			
		||||
            continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
          /* We need to deal with \P and \p in both phases. */
 | 
			
		||||
 | 
			
		||||
#ifdef SUPPORT_UCP
 | 
			
		||||
@@ -2606,14 +2864,18 @@ for (;; ptr++)
 | 
			
		||||
            unsigned int origd = d;
 | 
			
		||||
            while (get_othercase_range(&cc, origd, &occ, &ocd))
 | 
			
		||||
              {
 | 
			
		||||
              if (occ >= c && ocd <= d) continue;  /* Skip embedded ranges */
 | 
			
		||||
              if (occ >= (unsigned int)c &&
 | 
			
		||||
                  ocd <= (unsigned int)d)
 | 
			
		||||
                continue;                          /* Skip embedded ranges */
 | 
			
		||||
 | 
			
		||||
              if (occ < c  && ocd >= c - 1)        /* Extend the basic range */
 | 
			
		||||
              if (occ < (unsigned int)c  &&
 | 
			
		||||
                  ocd >= (unsigned int)c - 1)      /* Extend the basic range */
 | 
			
		||||
                {                                  /* if there is overlap,   */
 | 
			
		||||
                c = occ;                           /* noting that if occ < c */
 | 
			
		||||
                continue;                          /* we can't have ocd > d  */
 | 
			
		||||
                }                                  /* because a subrange is  */
 | 
			
		||||
              if (ocd > d && occ <= d + 1)         /* always shorter than    */
 | 
			
		||||
              if (ocd > (unsigned int)d &&
 | 
			
		||||
                  occ <= (unsigned int)d + 1)      /* always shorter than    */
 | 
			
		||||
                {                                  /* the basic range.       */
 | 
			
		||||
                d = ocd;
 | 
			
		||||
                continue;
 | 
			
		||||
@@ -3511,6 +3773,7 @@ for (;; ptr++)
 | 
			
		||||
    skipbytes = 0;
 | 
			
		||||
    bravalue = OP_CBRA;
 | 
			
		||||
    save_hwm = cd->hwm;
 | 
			
		||||
    reset_bracount = FALSE;
 | 
			
		||||
 | 
			
		||||
    if (*(++ptr) == '?')
 | 
			
		||||
      {
 | 
			
		||||
@@ -3532,6 +3795,11 @@ for (;; ptr++)
 | 
			
		||||
        continue;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        /* ------------------------------------------------------------ */
 | 
			
		||||
        case '|':                 /* Reset capture count for each branch */
 | 
			
		||||
        reset_bracount = TRUE;
 | 
			
		||||
        /* Fall through */
 | 
			
		||||
 | 
			
		||||
        /* ------------------------------------------------------------ */
 | 
			
		||||
        case ':':                 /* Non-capturing bracket */
 | 
			
		||||
        bravalue = OP_BRA;
 | 
			
		||||
@@ -3568,6 +3836,7 @@ for (;; ptr++)
 | 
			
		||||
 | 
			
		||||
        code[1+LINK_SIZE] = OP_CREF;
 | 
			
		||||
        skipbytes = 3;
 | 
			
		||||
        refsign = -1;
 | 
			
		||||
 | 
			
		||||
        /* Check for a test for recursion in a named group. */
 | 
			
		||||
 | 
			
		||||
@@ -3591,7 +3860,11 @@ for (;; ptr++)
 | 
			
		||||
          terminator = '\'';
 | 
			
		||||
          ptr++;
 | 
			
		||||
          }
 | 
			
		||||
        else terminator = 0;
 | 
			
		||||
        else
 | 
			
		||||
          {
 | 
			
		||||
          terminator = 0;
 | 
			
		||||
          if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
        /* We now expect to read a name; any thing else is an error */
 | 
			
		||||
 | 
			
		||||
@@ -3627,7 +3900,32 @@ for (;; ptr++)
 | 
			
		||||
        if (lengthptr != NULL) break;
 | 
			
		||||
 | 
			
		||||
        /* In the real compile we do the work of looking for the actual
 | 
			
		||||
        reference. */
 | 
			
		||||
        reference. If the string started with "+" or "-" we require the rest to
 | 
			
		||||
        be digits, in which case recno will be set. */
 | 
			
		||||
 | 
			
		||||
        if (refsign > 0)
 | 
			
		||||
          {
 | 
			
		||||
          if (recno <= 0)
 | 
			
		||||
            {
 | 
			
		||||
            *errorcodeptr = ERR58;
 | 
			
		||||
            goto FAILED;
 | 
			
		||||
            }
 | 
			
		||||
          if (refsign == '-')
 | 
			
		||||
            {
 | 
			
		||||
            recno = cd->bracount - recno + 1;
 | 
			
		||||
            if (recno <= 0)
 | 
			
		||||
              {
 | 
			
		||||
              *errorcodeptr = ERR15;
 | 
			
		||||
              goto FAILED;
 | 
			
		||||
              }
 | 
			
		||||
            }
 | 
			
		||||
          else recno += cd->bracount;
 | 
			
		||||
          PUT2(code, 2+LINK_SIZE, recno);
 | 
			
		||||
          break;
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
        /* Otherwise (did not start with "+" or "-"), start by looking for the
 | 
			
		||||
        name. */
 | 
			
		||||
 | 
			
		||||
        slot = cd->name_table;
 | 
			
		||||
        for (i = 0; i < cd->names_found; i++)
 | 
			
		||||
@@ -3946,19 +4244,54 @@ for (;; ptr++)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        /* ------------------------------------------------------------ */
 | 
			
		||||
        case '-': case '+':
 | 
			
		||||
        case '0': case '1': case '2': case '3': case '4':   /* Recursion or */
 | 
			
		||||
        case '5': case '6': case '7': case '8': case '9':   /* subroutine */
 | 
			
		||||
          {
 | 
			
		||||
          const uschar *called;
 | 
			
		||||
 | 
			
		||||
          if ((refsign = *ptr) == '+') ptr++;
 | 
			
		||||
          else if (refsign == '-')
 | 
			
		||||
            {
 | 
			
		||||
            if (g_ascii_isdigit(ptr[1]) == 0)
 | 
			
		||||
              goto OTHER_CHAR_AFTER_QUERY;
 | 
			
		||||
            ptr++;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
          recno = 0;
 | 
			
		||||
          while(g_ascii_isdigit(*ptr) != 0)
 | 
			
		||||
            recno = recno * 10 + *ptr++ - '0';
 | 
			
		||||
 | 
			
		||||
          if (*ptr != ')')
 | 
			
		||||
            {
 | 
			
		||||
            *errorcodeptr = ERR29;
 | 
			
		||||
            goto FAILED;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
          if (refsign == '-')
 | 
			
		||||
            {
 | 
			
		||||
            if (recno == 0)
 | 
			
		||||
              {
 | 
			
		||||
              *errorcodeptr = ERR58;
 | 
			
		||||
              goto FAILED;
 | 
			
		||||
              }
 | 
			
		||||
            recno = cd->bracount - recno + 1;
 | 
			
		||||
            if (recno <= 0)
 | 
			
		||||
              {
 | 
			
		||||
              *errorcodeptr = ERR15;
 | 
			
		||||
              goto FAILED;
 | 
			
		||||
              }
 | 
			
		||||
            }
 | 
			
		||||
          else if (refsign == '+')
 | 
			
		||||
            {
 | 
			
		||||
            if (recno == 0)
 | 
			
		||||
              {
 | 
			
		||||
              *errorcodeptr = ERR58;
 | 
			
		||||
              goto FAILED;
 | 
			
		||||
              }
 | 
			
		||||
            recno += cd->bracount;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
          /* Come here from code above that handles a named recursion */
 | 
			
		||||
 | 
			
		||||
          HANDLE_RECURSION:
 | 
			
		||||
@@ -4031,6 +4364,7 @@ for (;; ptr++)
 | 
			
		||||
 | 
			
		||||
        /* ------------------------------------------------------------ */
 | 
			
		||||
        default:              /* Other characters: check option setting */
 | 
			
		||||
        OTHER_CHAR_AFTER_QUERY:
 | 
			
		||||
        set = unset = 0;
 | 
			
		||||
        optset = &set;
 | 
			
		||||
 | 
			
		||||
@@ -4165,6 +4499,7 @@ for (;; ptr++)
 | 
			
		||||
         errorcodeptr,                 /* Where to put an error message */
 | 
			
		||||
         (bravalue == OP_ASSERTBACK ||
 | 
			
		||||
          bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
 | 
			
		||||
         reset_bracount,               /* True if (?| group */
 | 
			
		||||
         skipbytes,                    /* Skip over bracket number */
 | 
			
		||||
         &subfirstbyte,                /* For possible first char */
 | 
			
		||||
         &subreqbyte,                  /* For possible last char */
 | 
			
		||||
@@ -4181,9 +4516,11 @@ for (;; ptr++)
 | 
			
		||||
    is on the bracket. */
 | 
			
		||||
 | 
			
		||||
    /* If this is a conditional bracket, check that there are no more than
 | 
			
		||||
    two branches in the group, or just one if it's a DEFINE group. */
 | 
			
		||||
    two branches in the group, or just one if it's a DEFINE group. We do this
 | 
			
		||||
    in the real compile phase, not in the pre-pass, where the whole group may
 | 
			
		||||
    not be available. */
 | 
			
		||||
 | 
			
		||||
    if (bravalue == OP_COND)
 | 
			
		||||
    if (bravalue == OP_COND && lengthptr == NULL)
 | 
			
		||||
      {
 | 
			
		||||
      uschar *tc = code;
 | 
			
		||||
      int condcount = 0;
 | 
			
		||||
@@ -4343,12 +4680,13 @@ for (;; ptr++)
 | 
			
		||||
      zerofirstbyte = firstbyte;
 | 
			
		||||
      zeroreqbyte = reqbyte;
 | 
			
		||||
 | 
			
		||||
      /* \k<name> or \k'name' is a back reference by name (Perl syntax) */
 | 
			
		||||
      /* \k<name> or \k'name' is a back reference by name (Perl syntax).
 | 
			
		||||
      We also support \k{name} (.NET syntax) */
 | 
			
		||||
 | 
			
		||||
      if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\''))
 | 
			
		||||
      if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
 | 
			
		||||
        {
 | 
			
		||||
        is_recurse = FALSE;
 | 
			
		||||
        terminator = (*(++ptr) == '<')? '>' : '\'';
 | 
			
		||||
        terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
 | 
			
		||||
        goto NAMED_REF_OR_RECURSE;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
@@ -4514,13 +4852,14 @@ This function is used during the pre-compile phase when we are trying to find
 | 
			
		||||
out the amount of memory needed, as well as during the real compile phase. The
 | 
			
		||||
value of lengthptr distinguishes the two phases.
 | 
			
		||||
 | 
			
		||||
Argument:
 | 
			
		||||
Arguments:
 | 
			
		||||
  options        option bits, including any changes for this subpattern
 | 
			
		||||
  oldims         previous settings of ims option bits
 | 
			
		||||
  codeptr        -> the address of the current code pointer
 | 
			
		||||
  ptrptr         -> the address of the current pattern pointer
 | 
			
		||||
  errorcodeptr   -> pointer to error code variable
 | 
			
		||||
  lookbehind     TRUE if this is a lookbehind assertion
 | 
			
		||||
  reset_bracount TRUE to reset the count for each branch
 | 
			
		||||
  skipbytes      skip this many bytes at start (for brackets and OP_COND)
 | 
			
		||||
  firstbyteptr   place to put the first required character, or a negative number
 | 
			
		||||
  reqbyteptr     place to put the last required character, or a negative number
 | 
			
		||||
@@ -4534,8 +4873,9 @@ Returns:         TRUE on success
 | 
			
		||||
 | 
			
		||||
static BOOL
 | 
			
		||||
compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,
 | 
			
		||||
  int *errorcodeptr, BOOL lookbehind, int skipbytes, int *firstbyteptr,
 | 
			
		||||
  int *reqbyteptr, branch_chain *bcptr, compile_data *cd, int *lengthptr)
 | 
			
		||||
  int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
 | 
			
		||||
  int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd,
 | 
			
		||||
  int *lengthptr)
 | 
			
		||||
{
 | 
			
		||||
const uschar *ptr = *ptrptr;
 | 
			
		||||
uschar *code = *codeptr;
 | 
			
		||||
@@ -4545,6 +4885,8 @@ uschar *reverse_count = NULL;
 | 
			
		||||
int firstbyte, reqbyte;
 | 
			
		||||
int branchfirstbyte, branchreqbyte;
 | 
			
		||||
int length;
 | 
			
		||||
int orig_bracount;
 | 
			
		||||
int max_bracount;
 | 
			
		||||
branch_chain bc;
 | 
			
		||||
 | 
			
		||||
bc.outer = bcptr;
 | 
			
		||||
@@ -4573,8 +4915,14 @@ code += 1 + LINK_SIZE + skipbytes;
 | 
			
		||||
 | 
			
		||||
/* Loop for each alternative branch */
 | 
			
		||||
 | 
			
		||||
orig_bracount = max_bracount = cd->bracount;
 | 
			
		||||
for (;;)
 | 
			
		||||
  {
 | 
			
		||||
  /* For a (?| group, reset the capturing bracket count so that each branch
 | 
			
		||||
  uses the same numbers. */
 | 
			
		||||
 | 
			
		||||
  if (reset_bracount) cd->bracount = orig_bracount;
 | 
			
		||||
 | 
			
		||||
  /* Handle a change of ims options at the start of the branch */
 | 
			
		||||
 | 
			
		||||
  if ((options & PCRE_IMS) != oldims)
 | 
			
		||||
@@ -4604,6 +4952,11 @@ for (;;)
 | 
			
		||||
    return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  /* Keep the highest bracket count in case (?| was used and some branch
 | 
			
		||||
  has fewer than the rest. */
 | 
			
		||||
 | 
			
		||||
  if (cd->bracount > max_bracount) max_bracount = cd->bracount;
 | 
			
		||||
 | 
			
		||||
  /* In the real compile phase, there is some post-processing to be done. */
 | 
			
		||||
 | 
			
		||||
  if (lengthptr == NULL)
 | 
			
		||||
@@ -4667,16 +5020,18 @@ for (;;)
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  /* Reached end of expression, either ')' or end of pattern. Go back through
 | 
			
		||||
  the alternative branches and reverse the chain of offsets, with the field in
 | 
			
		||||
  the BRA item now becoming an offset to the first alternative. If there are
 | 
			
		||||
  no alternatives, it points to the end of the group. The length in the
 | 
			
		||||
  terminating ket is always the length of the whole bracketed item. If any of
 | 
			
		||||
  the ims options were changed inside the group, compile a resetting op-code
 | 
			
		||||
  following, except at the very end of the pattern. Return leaving the pointer
 | 
			
		||||
  at the terminating char. */
 | 
			
		||||
  /* Reached end of expression, either ')' or end of pattern. In the real
 | 
			
		||||
  compile phase, go back through the alternative branches and reverse the chain
 | 
			
		||||
  of offsets, with the field in the BRA item now becoming an offset to the
 | 
			
		||||
  first alternative. If there are no alternatives, it points to the end of the
 | 
			
		||||
  group. The length in the terminating ket is always the length of the whole
 | 
			
		||||
  bracketed item. If any of the ims options were changed inside the group,
 | 
			
		||||
  compile a resetting op-code following, except at the very end of the pattern.
 | 
			
		||||
  Return leaving the pointer at the terminating char. */
 | 
			
		||||
 | 
			
		||||
  if (*ptr != '|')
 | 
			
		||||
    {
 | 
			
		||||
    if (lengthptr == NULL)
 | 
			
		||||
      {
 | 
			
		||||
      int branch_length = code - last_branch;
 | 
			
		||||
      do
 | 
			
		||||
@@ -4687,6 +5042,7 @@ for (;;)
 | 
			
		||||
        last_branch -= branch_length;
 | 
			
		||||
        }
 | 
			
		||||
      while (branch_length > 0);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    /* Fill in the ket */
 | 
			
		||||
 | 
			
		||||
@@ -4703,6 +5059,10 @@ for (;;)
 | 
			
		||||
      length += 2;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    /* Retain the highest bracket number, in case resetting was used. */
 | 
			
		||||
 | 
			
		||||
    cd->bracount = max_bracount;
 | 
			
		||||
 | 
			
		||||
    /* Set values to pass back */
 | 
			
		||||
 | 
			
		||||
    *codeptr = code;
 | 
			
		||||
@@ -4713,17 +5073,29 @@ for (;;)
 | 
			
		||||
    return TRUE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  /* Another branch follows; insert an "or" node. Its length field points back
 | 
			
		||||
  /* Another branch follows. In the pre-compile phase, we can move the code
 | 
			
		||||
  pointer back to where it was for the start of the first branch. (That is,
 | 
			
		||||
  pretend that each branch is the only one.)
 | 
			
		||||
 | 
			
		||||
  In the real compile phase, insert an ALT node. Its length field points back
 | 
			
		||||
  to the previous branch while the bracket remains open. At the end the chain
 | 
			
		||||
  is reversed. It's done like this so that the start of the bracket has a
 | 
			
		||||
  zero offset until it is closed, making it possible to detect recursion. */
 | 
			
		||||
 | 
			
		||||
  if (lengthptr != NULL)
 | 
			
		||||
    {
 | 
			
		||||
    code = *codeptr + 1 + LINK_SIZE + skipbytes;
 | 
			
		||||
    length += 1 + LINK_SIZE;
 | 
			
		||||
    }
 | 
			
		||||
  else
 | 
			
		||||
    {
 | 
			
		||||
    *code = OP_ALT;
 | 
			
		||||
    PUT(code, 1, code - last_branch);
 | 
			
		||||
    bc.current = last_branch = code;
 | 
			
		||||
    code += 1 + LINK_SIZE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  ptr++;
 | 
			
		||||
  length += 1 + LINK_SIZE;
 | 
			
		||||
  }
 | 
			
		||||
/* Control never reaches here */
 | 
			
		||||
}
 | 
			
		||||
@@ -4990,7 +5362,7 @@ Returns:        pointer to compiled data block, or NULL on error,
 | 
			
		||||
                with errorptr and erroroffset set
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE pcre *
 | 
			
		||||
PCRE_EXP_DEFN pcre *
 | 
			
		||||
pcre_compile(const char *pattern, int options, const char **errorptr,
 | 
			
		||||
  int *erroroffset, const unsigned char *tables)
 | 
			
		||||
{
 | 
			
		||||
@@ -4998,7 +5370,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE pcre *
 | 
			
		||||
PCRE_EXP_DEFN pcre *
 | 
			
		||||
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
 | 
			
		||||
  const char **errorptr, int *erroroffset, const unsigned char *tables)
 | 
			
		||||
{
 | 
			
		||||
@@ -5047,7 +5419,7 @@ if (errorcodeptr != NULL) *errorcodeptr = ERR0;
 | 
			
		||||
if (erroroffset == NULL)
 | 
			
		||||
  {
 | 
			
		||||
  errorcode = ERR16;
 | 
			
		||||
  goto PCRE_EARLY_ERROR_RETURN;
 | 
			
		||||
  goto PCRE_EARLY_ERROR_RETURN2;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
*erroroffset = 0;
 | 
			
		||||
@@ -5060,7 +5432,7 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
 | 
			
		||||
     (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
 | 
			
		||||
  {
 | 
			
		||||
  errorcode = ERR44;
 | 
			
		||||
  goto PCRE_UTF8_ERROR_RETURN;
 | 
			
		||||
  goto PCRE_EARLY_ERROR_RETURN2;
 | 
			
		||||
  }
 | 
			
		||||
#else
 | 
			
		||||
if ((options & PCRE_UTF8) != 0)
 | 
			
		||||
@@ -5085,7 +5457,8 @@ cd->cbits = tables + cbits_offset;
 | 
			
		||||
cd->ctypes = tables + ctypes_offset;
 | 
			
		||||
 | 
			
		||||
/* Handle different types of newline. The three bits give seven cases. The
 | 
			
		||||
current code allows for fixed one- or two-byte sequences, plus "any". */
 | 
			
		||||
current code allows for fixed one- or two-byte sequences, plus "any" and
 | 
			
		||||
"anycrlf". */
 | 
			
		||||
 | 
			
		||||
switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
 | 
			
		||||
  {
 | 
			
		||||
@@ -5095,10 +5468,15 @@ switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
 | 
			
		||||
  case PCRE_NEWLINE_CR+
 | 
			
		||||
       PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
 | 
			
		||||
  case PCRE_NEWLINE_ANY: newline = -1; break;
 | 
			
		||||
  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
 | 
			
		||||
  default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
if (newline < 0)
 | 
			
		||||
if (newline == -2)
 | 
			
		||||
  {
 | 
			
		||||
  cd->nltype = NLTYPE_ANYCRLF;
 | 
			
		||||
  }
 | 
			
		||||
else if (newline < 0)
 | 
			
		||||
  {
 | 
			
		||||
  cd->nltype = NLTYPE_ANY;
 | 
			
		||||
  }
 | 
			
		||||
@@ -5159,7 +5537,8 @@ outside can help speed up starting point checks. */
 | 
			
		||||
code = cworkspace;
 | 
			
		||||
*code = OP_BRA;
 | 
			
		||||
(void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
 | 
			
		||||
  &code, &ptr, &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, &length);
 | 
			
		||||
  &code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
 | 
			
		||||
  &length);
 | 
			
		||||
if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
 | 
			
		||||
 | 
			
		||||
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
 | 
			
		||||
@@ -5227,7 +5606,7 @@ ptr = (const uschar *)pattern;
 | 
			
		||||
code = (uschar *)codestart;
 | 
			
		||||
*code = OP_BRA;
 | 
			
		||||
(void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
 | 
			
		||||
  &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
 | 
			
		||||
  &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
 | 
			
		||||
re->top_bracket = cd->bracount;
 | 
			
		||||
re->top_backref = cd->top_backref;
 | 
			
		||||
 | 
			
		||||
@@ -5272,9 +5651,7 @@ if (errorcode != 0)
 | 
			
		||||
  (pcre_free)(re);
 | 
			
		||||
  PCRE_EARLY_ERROR_RETURN:
 | 
			
		||||
  *erroroffset = ptr - (const uschar *)pattern;
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
  PCRE_UTF8_ERROR_RETURN:
 | 
			
		||||
#endif
 | 
			
		||||
  PCRE_EARLY_ERROR_RETURN2:
 | 
			
		||||
  *errorptr = error_texts + error_texts_offsets[errorcode];
 | 
			
		||||
  if (errorcodeptr != NULL) *errorcodeptr = errorcode;
 | 
			
		||||
  return NULL;
 | 
			
		||||
@@ -5364,7 +5741,7 @@ if ((re->options & PCRE_REQCHSET) != 0)
 | 
			
		||||
    else printf("Req char = \\x%02x%s\n", ch, caseless);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
pcre_printint(re, stdout);
 | 
			
		||||
pcre_printint(re, stdout, TRUE);
 | 
			
		||||
 | 
			
		||||
/* This check is done here in the debugging case so that the code that
 | 
			
		||||
was compiled can be seen. */
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -58,7 +58,7 @@ Arguments:
 | 
			
		||||
Returns:           0 if data returned, negative on error
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE int
 | 
			
		||||
PCRE_EXP_DEFN int
 | 
			
		||||
pcre_config(int what, void *where)
 | 
			
		||||
{
 | 
			
		||||
switch (what)
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -63,24 +63,30 @@ applications. */
 | 
			
		||||
 | 
			
		||||
/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
 | 
			
		||||
into others, under special conditions. A gap of 20 between the blocks should be
 | 
			
		||||
enough. */
 | 
			
		||||
enough. The resulting opcodes don't have to be less than 256 because they are
 | 
			
		||||
never stored, so we push them well clear of the normal opcodes. */
 | 
			
		||||
 | 
			
		||||
#define OP_PROP_EXTRA 100
 | 
			
		||||
#define OP_EXTUNI_EXTRA 120
 | 
			
		||||
#define OP_ANYNL_EXTRA 140
 | 
			
		||||
#define OP_PROP_EXTRA       300
 | 
			
		||||
#define OP_EXTUNI_EXTRA     320
 | 
			
		||||
#define OP_ANYNL_EXTRA      340
 | 
			
		||||
#define OP_HSPACE_EXTRA     360
 | 
			
		||||
#define OP_VSPACE_EXTRA     380
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* This table identifies those opcodes that are followed immediately by a
 | 
			
		||||
character that is to be tested in some way. This makes is possible to
 | 
			
		||||
centralize the loading of these characters. In the case of Type * etc, the
 | 
			
		||||
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
 | 
			
		||||
small value. */
 | 
			
		||||
small value. ***NOTE*** If the start of this table is modified, the two tables
 | 
			
		||||
that follow must also be modified. */
 | 
			
		||||
 | 
			
		||||
static uschar coptable[] = {
 | 
			
		||||
  0,                             /* End                                    */
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */
 | 
			
		||||
  0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
 | 
			
		||||
  0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
 | 
			
		||||
  0, 0,                          /* Any, Anybyte                           */
 | 
			
		||||
  0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */
 | 
			
		||||
  0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
 | 
			
		||||
  0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
 | 
			
		||||
  0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
 | 
			
		||||
  1,                             /* Char                                   */
 | 
			
		||||
  1,                             /* Charnc                                 */
 | 
			
		||||
@@ -127,7 +133,7 @@ static uschar coptable[] = {
 | 
			
		||||
and \w */
 | 
			
		||||
 | 
			
		||||
static uschar toptable1[] = {
 | 
			
		||||
  0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0,
 | 
			
		||||
  ctype_digit, ctype_digit,
 | 
			
		||||
  ctype_space, ctype_space,
 | 
			
		||||
  ctype_word,  ctype_word,
 | 
			
		||||
@@ -135,7 +141,7 @@ static uschar toptable1[] = {
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static uschar toptable2[] = {
 | 
			
		||||
  0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0,
 | 
			
		||||
  ctype_digit, 0,
 | 
			
		||||
  ctype_space, 0,
 | 
			
		||||
  ctype_word,  0,
 | 
			
		||||
@@ -500,7 +506,9 @@ for (;;)
 | 
			
		||||
    const uschar *code;
 | 
			
		||||
    int state_offset = current_state->offset;
 | 
			
		||||
    int count, codevalue;
 | 
			
		||||
#ifdef SUPPORT_UCP
 | 
			
		||||
    int chartype, script;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef DEBUG
 | 
			
		||||
    printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
 | 
			
		||||
@@ -555,10 +563,10 @@ for (;;)
 | 
			
		||||
    permitted.
 | 
			
		||||
 | 
			
		||||
    We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
 | 
			
		||||
    argument that is not a data character - but is always one byte long.
 | 
			
		||||
    Unfortunately, we have to take special action to deal with  \P, \p, and
 | 
			
		||||
    \X in this case. To keep the other cases fast, convert these ones to new
 | 
			
		||||
    opcodes. */
 | 
			
		||||
    argument that is not a data character - but is always one byte long. We
 | 
			
		||||
    have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
 | 
			
		||||
    this case. To keep the other cases fast, convert these ones to new opcodes.
 | 
			
		||||
    */
 | 
			
		||||
 | 
			
		||||
    if (coptable[codevalue] > 0)
 | 
			
		||||
      {
 | 
			
		||||
@@ -576,6 +584,10 @@ for (;;)
 | 
			
		||||
          case OP_PROP: codevalue += OP_PROP_EXTRA; break;
 | 
			
		||||
          case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
 | 
			
		||||
          case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
 | 
			
		||||
          case OP_NOT_HSPACE:
 | 
			
		||||
          case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
 | 
			
		||||
          case OP_NOT_VSPACE:
 | 
			
		||||
          case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
 | 
			
		||||
          default: break;
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
@@ -783,13 +795,12 @@ for (;;)
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef SUPPORT_UCP
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      /* Check the next character by Unicode property. We will get here only
 | 
			
		||||
      if the support is in the binary; otherwise a compile-time error occurs.
 | 
			
		||||
      */
 | 
			
		||||
 | 
			
		||||
#ifdef SUPPORT_UCP
 | 
			
		||||
      case OP_PROP:
 | 
			
		||||
      case OP_NOTPROP:
 | 
			
		||||
      if (clen > 0)
 | 
			
		||||
@@ -970,6 +981,7 @@ for (;;)
 | 
			
		||||
      argument. It keeps the code above fast for the other cases. The argument
 | 
			
		||||
      is in the d variable. */
 | 
			
		||||
 | 
			
		||||
#ifdef SUPPORT_UCP
 | 
			
		||||
      case OP_PROP_EXTRA + OP_TYPEPLUS:
 | 
			
		||||
      case OP_PROP_EXTRA + OP_TYPEMINPLUS:
 | 
			
		||||
      case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
 | 
			
		||||
@@ -1049,6 +1061,7 @@ for (;;)
 | 
			
		||||
        ADD_NEW_DATA(-state_offset, count, ncount);
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_ANYNL_EXTRA + OP_TYPEPLUS:
 | 
			
		||||
@@ -1085,6 +1098,97 @@ for (;;)
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEPLUS:
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
 | 
			
		||||
      count = current_state->count;  /* Already matched */
 | 
			
		||||
      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
 | 
			
		||||
      if (clen > 0)
 | 
			
		||||
        {
 | 
			
		||||
        BOOL OK;
 | 
			
		||||
        switch (c)
 | 
			
		||||
          {
 | 
			
		||||
          case 0x000a:
 | 
			
		||||
          case 0x000b:
 | 
			
		||||
          case 0x000c:
 | 
			
		||||
          case 0x000d:
 | 
			
		||||
          case 0x0085:
 | 
			
		||||
          case 0x2028:
 | 
			
		||||
          case 0x2029:
 | 
			
		||||
          OK = TRUE;
 | 
			
		||||
          break;
 | 
			
		||||
 | 
			
		||||
          default:
 | 
			
		||||
          OK = FALSE;
 | 
			
		||||
          break;
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
        if (OK == (d == OP_VSPACE))
 | 
			
		||||
          {
 | 
			
		||||
          if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
 | 
			
		||||
            {
 | 
			
		||||
            active_count--;           /* Remove non-match possibility */
 | 
			
		||||
            next_active_state--;
 | 
			
		||||
            }
 | 
			
		||||
          count++;
 | 
			
		||||
          ADD_NEW_DATA(-state_offset, count, 0);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEPLUS:
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
 | 
			
		||||
      count = current_state->count;  /* Already matched */
 | 
			
		||||
      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
 | 
			
		||||
      if (clen > 0)
 | 
			
		||||
        {
 | 
			
		||||
        BOOL OK;
 | 
			
		||||
        switch (c)
 | 
			
		||||
          {
 | 
			
		||||
          case 0x09:      /* HT */
 | 
			
		||||
          case 0x20:      /* SPACE */
 | 
			
		||||
          case 0xa0:      /* NBSP */
 | 
			
		||||
          case 0x1680:    /* OGHAM SPACE MARK */
 | 
			
		||||
          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
 | 
			
		||||
          case 0x2000:    /* EN QUAD */
 | 
			
		||||
          case 0x2001:    /* EM QUAD */
 | 
			
		||||
          case 0x2002:    /* EN SPACE */
 | 
			
		||||
          case 0x2003:    /* EM SPACE */
 | 
			
		||||
          case 0x2004:    /* THREE-PER-EM SPACE */
 | 
			
		||||
          case 0x2005:    /* FOUR-PER-EM SPACE */
 | 
			
		||||
          case 0x2006:    /* SIX-PER-EM SPACE */
 | 
			
		||||
          case 0x2007:    /* FIGURE SPACE */
 | 
			
		||||
          case 0x2008:    /* PUNCTUATION SPACE */
 | 
			
		||||
          case 0x2009:    /* THIN SPACE */
 | 
			
		||||
          case 0x200A:    /* HAIR SPACE */
 | 
			
		||||
          case 0x202f:    /* NARROW NO-BREAK SPACE */
 | 
			
		||||
          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
 | 
			
		||||
          case 0x3000:    /* IDEOGRAPHIC SPACE */
 | 
			
		||||
          OK = TRUE;
 | 
			
		||||
          break;
 | 
			
		||||
 | 
			
		||||
          default:
 | 
			
		||||
          OK = FALSE;
 | 
			
		||||
          break;
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
        if (OK == (d == OP_HSPACE))
 | 
			
		||||
          {
 | 
			
		||||
          if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
 | 
			
		||||
            {
 | 
			
		||||
            active_count--;           /* Remove non-match possibility */
 | 
			
		||||
            next_active_state--;
 | 
			
		||||
            }
 | 
			
		||||
          count++;
 | 
			
		||||
          ADD_NEW_DATA(-state_offset, count, 0);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
#ifdef SUPPORT_UCP
 | 
			
		||||
      case OP_PROP_EXTRA + OP_TYPEQUERY:
 | 
			
		||||
      case OP_PROP_EXTRA + OP_TYPEMINQUERY:
 | 
			
		||||
      case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
 | 
			
		||||
@@ -1182,6 +1286,7 @@ for (;;)
 | 
			
		||||
        ADD_NEW_DATA(-(state_offset + count), 0, ncount);
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_ANYNL_EXTRA + OP_TYPEQUERY:
 | 
			
		||||
@@ -1226,6 +1331,112 @@ for (;;)
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEQUERY:
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
 | 
			
		||||
      count = 2;
 | 
			
		||||
      goto QS4;
 | 
			
		||||
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPESTAR:
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
 | 
			
		||||
      count = 0;
 | 
			
		||||
 | 
			
		||||
      QS4:
 | 
			
		||||
      ADD_ACTIVE(state_offset + 2, 0);
 | 
			
		||||
      if (clen > 0)
 | 
			
		||||
        {
 | 
			
		||||
        BOOL OK;
 | 
			
		||||
        switch (c)
 | 
			
		||||
          {
 | 
			
		||||
          case 0x000a:
 | 
			
		||||
          case 0x000b:
 | 
			
		||||
          case 0x000c:
 | 
			
		||||
          case 0x000d:
 | 
			
		||||
          case 0x0085:
 | 
			
		||||
          case 0x2028:
 | 
			
		||||
          case 0x2029:
 | 
			
		||||
          OK = TRUE;
 | 
			
		||||
          break;
 | 
			
		||||
 | 
			
		||||
          default:
 | 
			
		||||
          OK = FALSE;
 | 
			
		||||
          break;
 | 
			
		||||
          }
 | 
			
		||||
        if (OK == (d == OP_VSPACE))
 | 
			
		||||
          {
 | 
			
		||||
          if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
 | 
			
		||||
              codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
 | 
			
		||||
            {
 | 
			
		||||
            active_count--;           /* Remove non-match possibility */
 | 
			
		||||
            next_active_state--;
 | 
			
		||||
            }
 | 
			
		||||
          ADD_NEW_DATA(-(state_offset + count), 0, 0);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEQUERY:
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
 | 
			
		||||
      count = 2;
 | 
			
		||||
      goto QS5;
 | 
			
		||||
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPESTAR:
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
 | 
			
		||||
      count = 0;
 | 
			
		||||
 | 
			
		||||
      QS5:
 | 
			
		||||
      ADD_ACTIVE(state_offset + 2, 0);
 | 
			
		||||
      if (clen > 0)
 | 
			
		||||
        {
 | 
			
		||||
        BOOL OK;
 | 
			
		||||
        switch (c)
 | 
			
		||||
          {
 | 
			
		||||
          case 0x09:      /* HT */
 | 
			
		||||
          case 0x20:      /* SPACE */
 | 
			
		||||
          case 0xa0:      /* NBSP */
 | 
			
		||||
          case 0x1680:    /* OGHAM SPACE MARK */
 | 
			
		||||
          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
 | 
			
		||||
          case 0x2000:    /* EN QUAD */
 | 
			
		||||
          case 0x2001:    /* EM QUAD */
 | 
			
		||||
          case 0x2002:    /* EN SPACE */
 | 
			
		||||
          case 0x2003:    /* EM SPACE */
 | 
			
		||||
          case 0x2004:    /* THREE-PER-EM SPACE */
 | 
			
		||||
          case 0x2005:    /* FOUR-PER-EM SPACE */
 | 
			
		||||
          case 0x2006:    /* SIX-PER-EM SPACE */
 | 
			
		||||
          case 0x2007:    /* FIGURE SPACE */
 | 
			
		||||
          case 0x2008:    /* PUNCTUATION SPACE */
 | 
			
		||||
          case 0x2009:    /* THIN SPACE */
 | 
			
		||||
          case 0x200A:    /* HAIR SPACE */
 | 
			
		||||
          case 0x202f:    /* NARROW NO-BREAK SPACE */
 | 
			
		||||
          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
 | 
			
		||||
          case 0x3000:    /* IDEOGRAPHIC SPACE */
 | 
			
		||||
          OK = TRUE;
 | 
			
		||||
          break;
 | 
			
		||||
 | 
			
		||||
          default:
 | 
			
		||||
          OK = FALSE;
 | 
			
		||||
          break;
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
        if (OK == (d == OP_HSPACE))
 | 
			
		||||
          {
 | 
			
		||||
          if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
 | 
			
		||||
              codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
 | 
			
		||||
            {
 | 
			
		||||
            active_count--;           /* Remove non-match possibility */
 | 
			
		||||
            next_active_state--;
 | 
			
		||||
            }
 | 
			
		||||
          ADD_NEW_DATA(-(state_offset + count), 0, 0);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
#ifdef SUPPORT_UCP
 | 
			
		||||
      case OP_PROP_EXTRA + OP_TYPEEXACT:
 | 
			
		||||
      case OP_PROP_EXTRA + OP_TYPEUPTO:
 | 
			
		||||
      case OP_PROP_EXTRA + OP_TYPEMINUPTO:
 | 
			
		||||
@@ -1313,6 +1524,7 @@ for (;;)
 | 
			
		||||
          { ADD_NEW_DATA(-state_offset, count, ncount); }
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_ANYNL_EXTRA + OP_TYPEEXACT:
 | 
			
		||||
@@ -1352,6 +1564,103 @@ for (;;)
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEEXACT:
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEUPTO:
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
 | 
			
		||||
      case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
 | 
			
		||||
      if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
 | 
			
		||||
        { ADD_ACTIVE(state_offset + 4, 0); }
 | 
			
		||||
      count = current_state->count;  /* Number already matched */
 | 
			
		||||
      if (clen > 0)
 | 
			
		||||
        {
 | 
			
		||||
        BOOL OK;
 | 
			
		||||
        switch (c)
 | 
			
		||||
          {
 | 
			
		||||
          case 0x000a:
 | 
			
		||||
          case 0x000b:
 | 
			
		||||
          case 0x000c:
 | 
			
		||||
          case 0x000d:
 | 
			
		||||
          case 0x0085:
 | 
			
		||||
          case 0x2028:
 | 
			
		||||
          case 0x2029:
 | 
			
		||||
          OK = TRUE;
 | 
			
		||||
          break;
 | 
			
		||||
 | 
			
		||||
          default:
 | 
			
		||||
          OK = FALSE;
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
        if (OK == (d == OP_VSPACE))
 | 
			
		||||
          {
 | 
			
		||||
          if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
 | 
			
		||||
            {
 | 
			
		||||
            active_count--;           /* Remove non-match possibility */
 | 
			
		||||
            next_active_state--;
 | 
			
		||||
            }
 | 
			
		||||
          if (++count >= GET2(code, 1))
 | 
			
		||||
            { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
 | 
			
		||||
          else
 | 
			
		||||
            { ADD_NEW_DATA(-state_offset, count, 0); }
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEEXACT:
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEUPTO:
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
 | 
			
		||||
      case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
 | 
			
		||||
      if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
 | 
			
		||||
        { ADD_ACTIVE(state_offset + 4, 0); }
 | 
			
		||||
      count = current_state->count;  /* Number already matched */
 | 
			
		||||
      if (clen > 0)
 | 
			
		||||
        {
 | 
			
		||||
        BOOL OK;
 | 
			
		||||
        switch (c)
 | 
			
		||||
          {
 | 
			
		||||
          case 0x09:      /* HT */
 | 
			
		||||
          case 0x20:      /* SPACE */
 | 
			
		||||
          case 0xa0:      /* NBSP */
 | 
			
		||||
          case 0x1680:    /* OGHAM SPACE MARK */
 | 
			
		||||
          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
 | 
			
		||||
          case 0x2000:    /* EN QUAD */
 | 
			
		||||
          case 0x2001:    /* EM QUAD */
 | 
			
		||||
          case 0x2002:    /* EN SPACE */
 | 
			
		||||
          case 0x2003:    /* EM SPACE */
 | 
			
		||||
          case 0x2004:    /* THREE-PER-EM SPACE */
 | 
			
		||||
          case 0x2005:    /* FOUR-PER-EM SPACE */
 | 
			
		||||
          case 0x2006:    /* SIX-PER-EM SPACE */
 | 
			
		||||
          case 0x2007:    /* FIGURE SPACE */
 | 
			
		||||
          case 0x2008:    /* PUNCTUATION SPACE */
 | 
			
		||||
          case 0x2009:    /* THIN SPACE */
 | 
			
		||||
          case 0x200A:    /* HAIR SPACE */
 | 
			
		||||
          case 0x202f:    /* NARROW NO-BREAK SPACE */
 | 
			
		||||
          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
 | 
			
		||||
          case 0x3000:    /* IDEOGRAPHIC SPACE */
 | 
			
		||||
          OK = TRUE;
 | 
			
		||||
          break;
 | 
			
		||||
 | 
			
		||||
          default:
 | 
			
		||||
          OK = FALSE;
 | 
			
		||||
          break;
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
        if (OK == (d == OP_HSPACE))
 | 
			
		||||
          {
 | 
			
		||||
          if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
 | 
			
		||||
            {
 | 
			
		||||
            active_count--;           /* Remove non-match possibility */
 | 
			
		||||
            next_active_state--;
 | 
			
		||||
            }
 | 
			
		||||
          if (++count >= GET2(code, 1))
 | 
			
		||||
            { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
 | 
			
		||||
          else
 | 
			
		||||
            { ADD_NEW_DATA(-state_offset, count, 0); }
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
/* ========================================================================== */
 | 
			
		||||
      /* These opcodes are followed by a character that is usually compared
 | 
			
		||||
      to the current subject character; it is loaded into d. We still get
 | 
			
		||||
@@ -1450,6 +1759,102 @@ for (;;)
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_NOT_VSPACE:
 | 
			
		||||
      if (clen > 0) switch(c)
 | 
			
		||||
        {
 | 
			
		||||
        case 0x000a:
 | 
			
		||||
        case 0x000b:
 | 
			
		||||
        case 0x000c:
 | 
			
		||||
        case 0x000d:
 | 
			
		||||
        case 0x0085:
 | 
			
		||||
        case 0x2028:
 | 
			
		||||
        case 0x2029:
 | 
			
		||||
        break;
 | 
			
		||||
 | 
			
		||||
        default:
 | 
			
		||||
        ADD_NEW(state_offset + 1, 0);
 | 
			
		||||
        break;
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_VSPACE:
 | 
			
		||||
      if (clen > 0) switch(c)
 | 
			
		||||
        {
 | 
			
		||||
        case 0x000a:
 | 
			
		||||
        case 0x000b:
 | 
			
		||||
        case 0x000c:
 | 
			
		||||
        case 0x000d:
 | 
			
		||||
        case 0x0085:
 | 
			
		||||
        case 0x2028:
 | 
			
		||||
        case 0x2029:
 | 
			
		||||
        ADD_NEW(state_offset + 1, 0);
 | 
			
		||||
        break;
 | 
			
		||||
 | 
			
		||||
        default: break;
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_NOT_HSPACE:
 | 
			
		||||
      if (clen > 0) switch(c)
 | 
			
		||||
        {
 | 
			
		||||
        case 0x09:      /* HT */
 | 
			
		||||
        case 0x20:      /* SPACE */
 | 
			
		||||
        case 0xa0:      /* NBSP */
 | 
			
		||||
        case 0x1680:    /* OGHAM SPACE MARK */
 | 
			
		||||
        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
 | 
			
		||||
        case 0x2000:    /* EN QUAD */
 | 
			
		||||
        case 0x2001:    /* EM QUAD */
 | 
			
		||||
        case 0x2002:    /* EN SPACE */
 | 
			
		||||
        case 0x2003:    /* EM SPACE */
 | 
			
		||||
        case 0x2004:    /* THREE-PER-EM SPACE */
 | 
			
		||||
        case 0x2005:    /* FOUR-PER-EM SPACE */
 | 
			
		||||
        case 0x2006:    /* SIX-PER-EM SPACE */
 | 
			
		||||
        case 0x2007:    /* FIGURE SPACE */
 | 
			
		||||
        case 0x2008:    /* PUNCTUATION SPACE */
 | 
			
		||||
        case 0x2009:    /* THIN SPACE */
 | 
			
		||||
        case 0x200A:    /* HAIR SPACE */
 | 
			
		||||
        case 0x202f:    /* NARROW NO-BREAK SPACE */
 | 
			
		||||
        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
 | 
			
		||||
        case 0x3000:    /* IDEOGRAPHIC SPACE */
 | 
			
		||||
        break;
 | 
			
		||||
 | 
			
		||||
        default:
 | 
			
		||||
        ADD_NEW(state_offset + 1, 0);
 | 
			
		||||
        break;
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      case OP_HSPACE:
 | 
			
		||||
      if (clen > 0) switch(c)
 | 
			
		||||
        {
 | 
			
		||||
        case 0x09:      /* HT */
 | 
			
		||||
        case 0x20:      /* SPACE */
 | 
			
		||||
        case 0xa0:      /* NBSP */
 | 
			
		||||
        case 0x1680:    /* OGHAM SPACE MARK */
 | 
			
		||||
        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
 | 
			
		||||
        case 0x2000:    /* EN QUAD */
 | 
			
		||||
        case 0x2001:    /* EM QUAD */
 | 
			
		||||
        case 0x2002:    /* EN SPACE */
 | 
			
		||||
        case 0x2003:    /* EM SPACE */
 | 
			
		||||
        case 0x2004:    /* THREE-PER-EM SPACE */
 | 
			
		||||
        case 0x2005:    /* FOUR-PER-EM SPACE */
 | 
			
		||||
        case 0x2006:    /* SIX-PER-EM SPACE */
 | 
			
		||||
        case 0x2007:    /* FIGURE SPACE */
 | 
			
		||||
        case 0x2008:    /* PUNCTUATION SPACE */
 | 
			
		||||
        case 0x2009:    /* THIN SPACE */
 | 
			
		||||
        case 0x200A:    /* HAIR SPACE */
 | 
			
		||||
        case 0x202f:    /* NARROW NO-BREAK SPACE */
 | 
			
		||||
        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
 | 
			
		||||
        case 0x3000:    /* IDEOGRAPHIC SPACE */
 | 
			
		||||
        ADD_NEW(state_offset + 1, 0);
 | 
			
		||||
        break;
 | 
			
		||||
        }
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
      /*-----------------------------------------------------------------*/
 | 
			
		||||
      /* Match a negated single character. This is only used for one-byte
 | 
			
		||||
      characters, that is, we know that d < 256. The character we are
 | 
			
		||||
@@ -2057,7 +2462,7 @@ is not anchored.
 | 
			
		||||
 | 
			
		||||
Arguments:
 | 
			
		||||
  argument_re     points to the compiled expression
 | 
			
		||||
  extra_data      points to extra data or is NULL (not currently used)
 | 
			
		||||
  extra_data      points to extra data or is NULL
 | 
			
		||||
  subject         points to the subject string
 | 
			
		||||
  length          length of subject string (may contain binary zeros)
 | 
			
		||||
  start_offset    where to start in the subject string
 | 
			
		||||
@@ -2073,7 +2478,7 @@ Returns:          > 0 => number of match offset pairs placed in offsets
 | 
			
		||||
                 < -1 => some kind of unexpected problem
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE int
 | 
			
		||||
PCRE_EXP_DEFN int
 | 
			
		||||
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
 | 
			
		||||
  const char *subject, int length, int start_offset, int options, int *offsets,
 | 
			
		||||
  int offsetcount, int *workspace, int wscount)
 | 
			
		||||
@@ -2163,10 +2568,10 @@ md->end_subject = end_subject;
 | 
			
		||||
md->moptions = options;
 | 
			
		||||
md->poptions = re->options;
 | 
			
		||||
 | 
			
		||||
/* Handle different types of newline. The two bits give four cases. If nothing
 | 
			
		||||
is set at run time, whatever was used at compile time applies. */
 | 
			
		||||
/* Handle different types of newline. The three bits give eight cases. If
 | 
			
		||||
nothing is set at run time, whatever was used at compile time applies. */
 | 
			
		||||
 | 
			
		||||
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
 | 
			
		||||
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
 | 
			
		||||
         PCRE_NEWLINE_BITS)
 | 
			
		||||
  {
 | 
			
		||||
  case 0: newline = NEWLINE; break;   /* Compile-time default */
 | 
			
		||||
@@ -2175,10 +2580,15 @@ switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
 | 
			
		||||
  case PCRE_NEWLINE_CR+
 | 
			
		||||
       PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
 | 
			
		||||
  case PCRE_NEWLINE_ANY: newline = -1; break;
 | 
			
		||||
  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
 | 
			
		||||
  default: return PCRE_ERROR_BADNEWLINE;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
if (newline < 0)
 | 
			
		||||
if (newline == -2)
 | 
			
		||||
  {
 | 
			
		||||
  md->nltype = NLTYPE_ANYCRLF;
 | 
			
		||||
  }
 | 
			
		||||
else if (newline < 0)
 | 
			
		||||
  {
 | 
			
		||||
  md->nltype = NLTYPE_ANY;
 | 
			
		||||
  }
 | 
			
		||||
@@ -2308,6 +2718,16 @@ for (;;)
 | 
			
		||||
        {
 | 
			
		||||
        while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
 | 
			
		||||
          current_subject++;
 | 
			
		||||
 | 
			
		||||
        /* If we have just passed a CR and the newline option is ANY or
 | 
			
		||||
        ANYCRLF, and we are now at a LF, advance the match position by one more
 | 
			
		||||
        character. */
 | 
			
		||||
 | 
			
		||||
        if (current_subject[-1] == '\r' &&
 | 
			
		||||
             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
 | 
			
		||||
             current_subject < end_subject &&
 | 
			
		||||
             *current_subject == '\n')
 | 
			
		||||
          current_subject++;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
@@ -2416,11 +2836,14 @@ for (;;)
 | 
			
		||||
    }
 | 
			
		||||
  if (current_subject > end_subject) break;
 | 
			
		||||
 | 
			
		||||
  /* If we have just passed a CR and the newline option is CRLF or ANY, and we
 | 
			
		||||
  are now at a LF, advance the match position by one more character. */
 | 
			
		||||
  /* If we have just passed a CR and the newline option is CRLF or ANY or
 | 
			
		||||
  ANYCRLF, and we are now at a LF, advance the match position by one more
 | 
			
		||||
  character. */
 | 
			
		||||
 | 
			
		||||
  if (current_subject[-1] == '\r' &&
 | 
			
		||||
       (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
 | 
			
		||||
       (md->nltype == NLTYPE_ANY ||
 | 
			
		||||
        md->nltype == NLTYPE_ANYCRLF ||
 | 
			
		||||
        md->nllen == 2) &&
 | 
			
		||||
       current_subject < end_subject &&
 | 
			
		||||
       *current_subject == '\n')
 | 
			
		||||
    current_subject++;
 | 
			
		||||
 
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -61,7 +61,7 @@ Arguments:
 | 
			
		||||
Returns:           0 if data returned, negative on error
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE int
 | 
			
		||||
PCRE_EXP_DEFN int
 | 
			
		||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
 | 
			
		||||
  void *where)
 | 
			
		||||
{
 | 
			
		||||
@@ -140,6 +140,14 @@ switch (what)
 | 
			
		||||
  *((const uschar **)where) = (const uschar *)(_pcre_default_tables);
 | 
			
		||||
  break;
 | 
			
		||||
 | 
			
		||||
  case PCRE_INFO_OKPARTIAL:
 | 
			
		||||
  *((int *)where) = (re->options & PCRE_NOPARTIAL) == 0;
 | 
			
		||||
  break;
 | 
			
		||||
 | 
			
		||||
  case PCRE_INFO_JCHANGED:
 | 
			
		||||
  *((int *)where) = (re->options & PCRE_JCHANGED) != 0;
 | 
			
		||||
  break;
 | 
			
		||||
 | 
			
		||||
  default: return PCRE_ERROR_BADOPTION;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -46,14 +46,8 @@ indirection. These values can be changed by the caller, but are shared between
 | 
			
		||||
all threads. However, when compiling for Virtual Pascal, things are done
 | 
			
		||||
differently, and global variables are not used (see pcre.in). */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include "pcre_internal.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
extern "C" int   (*pcre_callout)(pcre_callout_block *) = NULL;
 | 
			
		||||
#else
 | 
			
		||||
int   (*pcre_callout)(pcre_callout_block *) = NULL;
 | 
			
		||||
#endif
 | 
			
		||||
PCRE_EXP_DATA_DEFN int   (*pcre_callout)(pcre_callout_block *) = NULL;
 | 
			
		||||
 | 
			
		||||
/* End of pcre_globals.c */
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -68,7 +68,7 @@ Returns:        number of capturing subpatterns
 | 
			
		||||
                or negative values on error
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE int
 | 
			
		||||
PCRE_EXP_DEFN int
 | 
			
		||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
 | 
			
		||||
{
 | 
			
		||||
real_pcre internal_re;
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -83,8 +83,58 @@ setjmp and stdarg are used is when NO_RECURSE is set. */
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#ifndef PCRE_SPY
 | 
			
		||||
#define PCRE_DEFINITION       /* Win32 __declspec(export) trigger for .dll */
 | 
			
		||||
/* When compiling a DLL for Windows, the exported symbols have to be declared
 | 
			
		||||
using some MS magic. I found some useful information on this web page:
 | 
			
		||||
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
 | 
			
		||||
information there, using __declspec(dllexport) without "extern" we have a
 | 
			
		||||
definition; with "extern" we have a declaration. The settings here override the
 | 
			
		||||
setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL,
 | 
			
		||||
which is all that is needed for applications (they just import the symbols). We
 | 
			
		||||
use:
 | 
			
		||||
 | 
			
		||||
  PCRE_EXP_DECL       for declarations
 | 
			
		||||
  PCRE_EXP_DEFN       for definitions of exported functions
 | 
			
		||||
  PCRE_EXP_DATA_DEFN  for definitions of exported variables
 | 
			
		||||
 | 
			
		||||
The reason for the two DEFN macros is that in non-Windows environments, one
 | 
			
		||||
does not want to have "extern" before variable definitions because it leads to
 | 
			
		||||
compiler warnings. So we distinguish between functions and variables. In
 | 
			
		||||
Windows, the two should always be the same.
 | 
			
		||||
 | 
			
		||||
The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest,
 | 
			
		||||
which is an application, but needs to import this file in order to "peek" at
 | 
			
		||||
internals, can #include pcre.h first to get an application's-eye view.
 | 
			
		||||
 | 
			
		||||
In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
 | 
			
		||||
special-purpose environments) might want to stick other stuff in front of
 | 
			
		||||
exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and
 | 
			
		||||
PCRE_EXP_DATA_DEFN only if they are not already set. */
 | 
			
		||||
 | 
			
		||||
#ifndef PCRE_EXP_DECL
 | 
			
		||||
#  ifdef _WIN32
 | 
			
		||||
#    ifdef DLL_EXPORT
 | 
			
		||||
#      define PCRE_EXP_DECL       extern __declspec(dllexport)
 | 
			
		||||
#      define PCRE_EXP_DEFN       __declspec(dllexport)
 | 
			
		||||
#      define PCRE_EXP_DATA_DEFN  __declspec(dllexport)
 | 
			
		||||
#    else
 | 
			
		||||
#      define PCRE_EXP_DECL       extern
 | 
			
		||||
#      define PCRE_EXP_DEFN
 | 
			
		||||
#      define PCRE_EXP_DATA_DEFN
 | 
			
		||||
#    endif
 | 
			
		||||
#
 | 
			
		||||
#  else
 | 
			
		||||
#    ifdef __cplusplus
 | 
			
		||||
#      define PCRE_EXP_DECL       extern "C"
 | 
			
		||||
#    else
 | 
			
		||||
#      define PCRE_EXP_DECL       extern
 | 
			
		||||
#    endif
 | 
			
		||||
#    ifndef PCRE_EXP_DEFN
 | 
			
		||||
#      define PCRE_EXP_DEFN       PCRE_EXP_DECL
 | 
			
		||||
#    endif
 | 
			
		||||
#    ifndef PCRE_EXP_DATA_DEFN
 | 
			
		||||
#      define PCRE_EXP_DATA_DEFN
 | 
			
		||||
#    endif
 | 
			
		||||
#  endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
 | 
			
		||||
@@ -125,21 +175,22 @@ characters only go up to 0x7fffffff (though Unicode doesn't go beyond
 | 
			
		||||
#define NOTACHAR 0xffffffff
 | 
			
		||||
 | 
			
		||||
/* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
 | 
			
		||||
and "all" at present). The following macros are used to package up testing for
 | 
			
		||||
newlines. NLBLOCK, PSSTART, and PSEND are defined in the various modules to
 | 
			
		||||
indicate in which datablock the parameters exist, and what the start/end of
 | 
			
		||||
string field names are. */
 | 
			
		||||
"any" and "anycrlf" at present). The following macros are used to package up
 | 
			
		||||
testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
 | 
			
		||||
modules to indicate in which datablock the parameters exist, and what the
 | 
			
		||||
start/end of string field names are. */
 | 
			
		||||
 | 
			
		||||
#define NLTYPE_FIXED    0     /* Newline is a fixed length string */
 | 
			
		||||
#define NLTYPE_ANY      1     /* Newline is any Unicode line ending */
 | 
			
		||||
#define NLTYPE_ANYCRLF  2     /* Newline is CR, LF, or CRLF */
 | 
			
		||||
 | 
			
		||||
/* This macro checks for a newline at the given position */
 | 
			
		||||
 | 
			
		||||
#define IS_NEWLINE(p) \
 | 
			
		||||
  ((NLBLOCK->nltype != NLTYPE_FIXED)? \
 | 
			
		||||
    ((p) < NLBLOCK->PSEND && \
 | 
			
		||||
     _pcre_is_newline((p), NLBLOCK->PSEND, &(NLBLOCK->nllen), utf8) \
 | 
			
		||||
    ) \
 | 
			
		||||
     _pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\
 | 
			
		||||
       utf8)) \
 | 
			
		||||
    : \
 | 
			
		||||
    ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
 | 
			
		||||
     (p)[0] == NLBLOCK->nl[0] && \
 | 
			
		||||
@@ -152,8 +203,8 @@ string field names are. */
 | 
			
		||||
#define WAS_NEWLINE(p) \
 | 
			
		||||
  ((NLBLOCK->nltype != NLTYPE_FIXED)? \
 | 
			
		||||
    ((p) > NLBLOCK->PSSTART && \
 | 
			
		||||
     _pcre_was_newline((p), NLBLOCK->PSSTART, &(NLBLOCK->nllen), utf8) \
 | 
			
		||||
    ) \
 | 
			
		||||
     _pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
 | 
			
		||||
       &(NLBLOCK->nllen), utf8)) \
 | 
			
		||||
    : \
 | 
			
		||||
    ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
 | 
			
		||||
     (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
 | 
			
		||||
@@ -178,10 +229,12 @@ must begin with PCRE_. */
 | 
			
		||||
#define USPTR const unsigned char *
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* Include the public PCRE header and the definitions of UCP character property
 | 
			
		||||
values. */
 | 
			
		||||
 | 
			
		||||
#include "pcre.h"
 | 
			
		||||
#include <pcre.h>
 | 
			
		||||
#include "ucp.h"
 | 
			
		||||
 | 
			
		||||
/* When compiling for use with the Virtual Pascal compiler, these functions
 | 
			
		||||
@@ -189,7 +242,9 @@ need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
 | 
			
		||||
option on the command line. */
 | 
			
		||||
 | 
			
		||||
#ifdef VPCOMPAT
 | 
			
		||||
#define strlen(s)        _strlen(s)
 | 
			
		||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
 | 
			
		||||
#define memcmp(s,c,n)    _memcmp(s,c,n)
 | 
			
		||||
#define memcpy(d,s,n)    _memcpy(d,s,n)
 | 
			
		||||
#define memmove(d,s,n)   _memmove(d,s,n)
 | 
			
		||||
#define memset(s,c,n)    _memset(s,c,n)
 | 
			
		||||
@@ -198,23 +253,31 @@ option on the command line. */
 | 
			
		||||
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
 | 
			
		||||
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
 | 
			
		||||
is set. Otherwise, include an emulating function for those systems that have
 | 
			
		||||
neither (there some non-Unix environments where this is the case). This assumes
 | 
			
		||||
that all calls to memmove are moving strings upwards in store, which is the
 | 
			
		||||
case in PCRE. */
 | 
			
		||||
neither (there some non-Unix environments where this is the case). */
 | 
			
		||||
 | 
			
		||||
#if ! HAVE_MEMMOVE
 | 
			
		||||
#ifndef HAVE_MEMMOVE
 | 
			
		||||
#undef  memmove        /* some systems may have a macro */
 | 
			
		||||
#if HAVE_BCOPY
 | 
			
		||||
#ifdef HAVE_BCOPY
 | 
			
		||||
#define memmove(a, b, c) bcopy(b, a, c)
 | 
			
		||||
#else  /* HAVE_BCOPY */
 | 
			
		||||
static void *
 | 
			
		||||
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
 | 
			
		||||
pcre_memmove(void *d, const void *s, size_t n)
 | 
			
		||||
{
 | 
			
		||||
size_t i;
 | 
			
		||||
unsigned char *dest = (unsigned char *)d;
 | 
			
		||||
const unsigned char *src = (const unsigned char *)s;
 | 
			
		||||
if (dest > src)
 | 
			
		||||
  {
 | 
			
		||||
  dest += n;
 | 
			
		||||
  src += n;
 | 
			
		||||
  for (i = 0; i < n; ++i) *(--dest) = *(--src);
 | 
			
		||||
return dest;
 | 
			
		||||
  return (void *)dest;
 | 
			
		||||
  }
 | 
			
		||||
else
 | 
			
		||||
  {
 | 
			
		||||
  for (i = 0; i < n; ++i) *dest++ = *src++;
 | 
			
		||||
  return (void *)(dest - n);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
 | 
			
		||||
#endif   /* not HAVE_BCOPY */
 | 
			
		||||
@@ -439,7 +502,8 @@ bits. */
 | 
			
		||||
/* Masks for identifying the public options that are permitted at compile
 | 
			
		||||
time, run time, or study time, respectively. */
 | 
			
		||||
 | 
			
		||||
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY)
 | 
			
		||||
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
 | 
			
		||||
                           PCRE_NEWLINE_ANYCRLF)
 | 
			
		||||
 | 
			
		||||
#define PUBLIC_OPTIONS \
 | 
			
		||||
  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
 | 
			
		||||
@@ -538,9 +602,9 @@ ESC_Z to detect the types that may be repeated. These are the types that
 | 
			
		||||
consume characters. If any new escapes are put in between that don't consume a
 | 
			
		||||
character, that code will have to change. */
 | 
			
		||||
 | 
			
		||||
enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
 | 
			
		||||
       ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_X, ESC_Z, ESC_z,
 | 
			
		||||
       ESC_E, ESC_Q, ESC_k, ESC_REF };
 | 
			
		||||
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
 | 
			
		||||
       ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
 | 
			
		||||
       ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
 | 
			
		||||
@@ -562,133 +626,138 @@ enum {
 | 
			
		||||
 | 
			
		||||
  OP_SOD,            /* 1 Start of data: \A */
 | 
			
		||||
  OP_SOM,            /* 2 Start of match (subject + offset): \G */
 | 
			
		||||
  OP_NOT_WORD_BOUNDARY,  /*  3 \B */
 | 
			
		||||
  OP_WORD_BOUNDARY,      /*  4 \b */
 | 
			
		||||
  OP_NOT_DIGIT,          /*  5 \D */
 | 
			
		||||
  OP_DIGIT,              /*  6 \d */
 | 
			
		||||
  OP_NOT_WHITESPACE,     /*  7 \S */
 | 
			
		||||
  OP_WHITESPACE,         /*  8 \s */
 | 
			
		||||
  OP_NOT_WORDCHAR,       /*  9 \W */
 | 
			
		||||
  OP_WORDCHAR,           /* 10 \w */
 | 
			
		||||
  OP_ANY,            /* 11 Match any character */
 | 
			
		||||
  OP_ANYBYTE,        /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
 | 
			
		||||
  OP_NOTPROP,        /* 13 \P (not Unicode property) */
 | 
			
		||||
  OP_PROP,           /* 14 \p (Unicode property) */
 | 
			
		||||
  OP_ANYNL,          /* 15 \R (any newline sequence) */
 | 
			
		||||
  OP_EXTUNI,         /* 16 \X (extended Unicode sequence */
 | 
			
		||||
  OP_EODN,           /* 17 End of data or \n at end of data: \Z. */
 | 
			
		||||
  OP_EOD,            /* 18 End of data: \z */
 | 
			
		||||
  OP_SET_SOM,        /* 3 Set start of match (\K) */
 | 
			
		||||
  OP_NOT_WORD_BOUNDARY,  /*  4 \B */
 | 
			
		||||
  OP_WORD_BOUNDARY,      /*  5 \b */
 | 
			
		||||
  OP_NOT_DIGIT,          /*  6 \D */
 | 
			
		||||
  OP_DIGIT,              /*  7 \d */
 | 
			
		||||
  OP_NOT_WHITESPACE,     /*  8 \S */
 | 
			
		||||
  OP_WHITESPACE,         /*  9 \s */
 | 
			
		||||
  OP_NOT_WORDCHAR,       /* 10 \W */
 | 
			
		||||
  OP_WORDCHAR,           /* 11 \w */
 | 
			
		||||
  OP_ANY,            /* 12 Match any character */
 | 
			
		||||
  OP_ANYBYTE,        /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */
 | 
			
		||||
  OP_NOTPROP,        /* 14 \P (not Unicode property) */
 | 
			
		||||
  OP_PROP,           /* 15 \p (Unicode property) */
 | 
			
		||||
  OP_ANYNL,          /* 16 \R (any newline sequence) */
 | 
			
		||||
  OP_NOT_HSPACE,     /* 17 \H (not horizontal whitespace) */
 | 
			
		||||
  OP_HSPACE,         /* 18 \h (horizontal whitespace) */
 | 
			
		||||
  OP_NOT_VSPACE,     /* 19 \V (not vertical whitespace) */
 | 
			
		||||
  OP_VSPACE,         /* 20 \v (vertical whitespace) */
 | 
			
		||||
  OP_EXTUNI,         /* 21 \X (extended Unicode sequence */
 | 
			
		||||
  OP_EODN,           /* 22 End of data or \n at end of data: \Z. */
 | 
			
		||||
  OP_EOD,            /* 23 End of data: \z */
 | 
			
		||||
 | 
			
		||||
  OP_OPT,            /* 19 Set runtime options */
 | 
			
		||||
  OP_CIRC,           /* 20 Start of line - varies with multiline switch */
 | 
			
		||||
  OP_DOLL,           /* 21 End of line - varies with multiline switch */
 | 
			
		||||
  OP_CHAR,           /* 22 Match one character, casefully */
 | 
			
		||||
  OP_CHARNC,         /* 23 Match one character, caselessly */
 | 
			
		||||
  OP_NOT,            /* 24 Match one character, not the following one */
 | 
			
		||||
  OP_OPT,            /* 24 Set runtime options */
 | 
			
		||||
  OP_CIRC,           /* 25 Start of line - varies with multiline switch */
 | 
			
		||||
  OP_DOLL,           /* 26 End of line - varies with multiline switch */
 | 
			
		||||
  OP_CHAR,           /* 27 Match one character, casefully */
 | 
			
		||||
  OP_CHARNC,         /* 28 Match one character, caselessly */
 | 
			
		||||
  OP_NOT,            /* 29 Match one character, not the following one */
 | 
			
		||||
 | 
			
		||||
  OP_STAR,           /* 25 The maximizing and minimizing versions of */
 | 
			
		||||
  OP_MINSTAR,        /* 26 these six opcodes must come in pairs, with */
 | 
			
		||||
  OP_PLUS,           /* 27 the minimizing one second. */
 | 
			
		||||
  OP_MINPLUS,        /* 28 This first set applies to single characters.*/
 | 
			
		||||
  OP_QUERY,          /* 29 */
 | 
			
		||||
  OP_MINQUERY,       /* 30 */
 | 
			
		||||
  OP_STAR,           /* 30 The maximizing and minimizing versions of */
 | 
			
		||||
  OP_MINSTAR,        /* 31 these six opcodes must come in pairs, with */
 | 
			
		||||
  OP_PLUS,           /* 32 the minimizing one second. */
 | 
			
		||||
  OP_MINPLUS,        /* 33 This first set applies to single characters.*/
 | 
			
		||||
  OP_QUERY,          /* 34 */
 | 
			
		||||
  OP_MINQUERY,       /* 35 */
 | 
			
		||||
 | 
			
		||||
  OP_UPTO,           /* 31 From 0 to n matches */
 | 
			
		||||
  OP_MINUPTO,        /* 32 */
 | 
			
		||||
  OP_EXACT,          /* 33 Exactly n matches */
 | 
			
		||||
  OP_UPTO,           /* 36 From 0 to n matches */
 | 
			
		||||
  OP_MINUPTO,        /* 37 */
 | 
			
		||||
  OP_EXACT,          /* 38 Exactly n matches */
 | 
			
		||||
 | 
			
		||||
  OP_POSSTAR,        /* 34 Possessified star */
 | 
			
		||||
  OP_POSPLUS,        /* 35 Possessified plus */
 | 
			
		||||
  OP_POSQUERY,       /* 36 Posesssified query */
 | 
			
		||||
  OP_POSUPTO,        /* 37 Possessified upto */
 | 
			
		||||
  OP_POSSTAR,        /* 39 Possessified star */
 | 
			
		||||
  OP_POSPLUS,        /* 40 Possessified plus */
 | 
			
		||||
  OP_POSQUERY,       /* 41 Posesssified query */
 | 
			
		||||
  OP_POSUPTO,        /* 42 Possessified upto */
 | 
			
		||||
 | 
			
		||||
  OP_NOTSTAR,        /* 38 The maximizing and minimizing versions of */
 | 
			
		||||
  OP_NOTMINSTAR,     /* 39 these six opcodes must come in pairs, with */
 | 
			
		||||
  OP_NOTPLUS,        /* 40 the minimizing one second. They must be in */
 | 
			
		||||
  OP_NOTMINPLUS,     /* 41 exactly the same order as those above. */
 | 
			
		||||
  OP_NOTQUERY,       /* 42 This set applies to "not" single characters. */
 | 
			
		||||
  OP_NOTMINQUERY,    /* 43 */
 | 
			
		||||
  OP_NOTSTAR,        /* 43 The maximizing and minimizing versions of */
 | 
			
		||||
  OP_NOTMINSTAR,     /* 44 these six opcodes must come in pairs, with */
 | 
			
		||||
  OP_NOTPLUS,        /* 45 the minimizing one second. They must be in */
 | 
			
		||||
  OP_NOTMINPLUS,     /* 46 exactly the same order as those above. */
 | 
			
		||||
  OP_NOTQUERY,       /* 47 This set applies to "not" single characters. */
 | 
			
		||||
  OP_NOTMINQUERY,    /* 48 */
 | 
			
		||||
 | 
			
		||||
  OP_NOTUPTO,        /* 44 From 0 to n matches */
 | 
			
		||||
  OP_NOTMINUPTO,     /* 45 */
 | 
			
		||||
  OP_NOTEXACT,       /* 46 Exactly n matches */
 | 
			
		||||
  OP_NOTUPTO,        /* 49 From 0 to n matches */
 | 
			
		||||
  OP_NOTMINUPTO,     /* 50 */
 | 
			
		||||
  OP_NOTEXACT,       /* 51 Exactly n matches */
 | 
			
		||||
 | 
			
		||||
  OP_NOTPOSSTAR,     /* 47 Possessified versions */
 | 
			
		||||
  OP_NOTPOSPLUS,     /* 48 */
 | 
			
		||||
  OP_NOTPOSQUERY,    /* 49 */
 | 
			
		||||
  OP_NOTPOSUPTO,     /* 50 */
 | 
			
		||||
  OP_NOTPOSSTAR,     /* 52 Possessified versions */
 | 
			
		||||
  OP_NOTPOSPLUS,     /* 53 */
 | 
			
		||||
  OP_NOTPOSQUERY,    /* 54 */
 | 
			
		||||
  OP_NOTPOSUPTO,     /* 55 */
 | 
			
		||||
 | 
			
		||||
  OP_TYPESTAR,       /* 51 The maximizing and minimizing versions of */
 | 
			
		||||
  OP_TYPEMINSTAR,    /* 52 these six opcodes must come in pairs, with */
 | 
			
		||||
  OP_TYPEPLUS,       /* 53 the minimizing one second. These codes must */
 | 
			
		||||
  OP_TYPEMINPLUS,    /* 54 be in exactly the same order as those above. */
 | 
			
		||||
  OP_TYPEQUERY,      /* 55 This set applies to character types such as \d */
 | 
			
		||||
  OP_TYPEMINQUERY,   /* 56 */
 | 
			
		||||
  OP_TYPESTAR,       /* 56 The maximizing and minimizing versions of */
 | 
			
		||||
  OP_TYPEMINSTAR,    /* 57 these six opcodes must come in pairs, with */
 | 
			
		||||
  OP_TYPEPLUS,       /* 58 the minimizing one second. These codes must */
 | 
			
		||||
  OP_TYPEMINPLUS,    /* 59 be in exactly the same order as those above. */
 | 
			
		||||
  OP_TYPEQUERY,      /* 60 This set applies to character types such as \d */
 | 
			
		||||
  OP_TYPEMINQUERY,   /* 61 */
 | 
			
		||||
 | 
			
		||||
  OP_TYPEUPTO,       /* 57 From 0 to n matches */
 | 
			
		||||
  OP_TYPEMINUPTO,    /* 58 */
 | 
			
		||||
  OP_TYPEEXACT,      /* 59 Exactly n matches */
 | 
			
		||||
  OP_TYPEUPTO,       /* 62 From 0 to n matches */
 | 
			
		||||
  OP_TYPEMINUPTO,    /* 63 */
 | 
			
		||||
  OP_TYPEEXACT,      /* 64 Exactly n matches */
 | 
			
		||||
 | 
			
		||||
  OP_TYPEPOSSTAR,    /* 60 Possessified versions */
 | 
			
		||||
  OP_TYPEPOSPLUS,    /* 61 */
 | 
			
		||||
  OP_TYPEPOSQUERY,   /* 62 */
 | 
			
		||||
  OP_TYPEPOSUPTO,    /* 63 */
 | 
			
		||||
  OP_TYPEPOSSTAR,    /* 65 Possessified versions */
 | 
			
		||||
  OP_TYPEPOSPLUS,    /* 66 */
 | 
			
		||||
  OP_TYPEPOSQUERY,   /* 67 */
 | 
			
		||||
  OP_TYPEPOSUPTO,    /* 68 */
 | 
			
		||||
 | 
			
		||||
  OP_CRSTAR,         /* 64 The maximizing and minimizing versions of */
 | 
			
		||||
  OP_CRMINSTAR,      /* 65 all these opcodes must come in pairs, with */
 | 
			
		||||
  OP_CRPLUS,         /* 66 the minimizing one second. These codes must */
 | 
			
		||||
  OP_CRMINPLUS,      /* 67 be in exactly the same order as those above. */
 | 
			
		||||
  OP_CRQUERY,        /* 68 These are for character classes and back refs */
 | 
			
		||||
  OP_CRMINQUERY,     /* 69 */
 | 
			
		||||
  OP_CRRANGE,        /* 70 These are different to the three sets above. */
 | 
			
		||||
  OP_CRMINRANGE,     /* 71 */
 | 
			
		||||
  OP_CRSTAR,         /* 69 The maximizing and minimizing versions of */
 | 
			
		||||
  OP_CRMINSTAR,      /* 70 all these opcodes must come in pairs, with */
 | 
			
		||||
  OP_CRPLUS,         /* 71 the minimizing one second. These codes must */
 | 
			
		||||
  OP_CRMINPLUS,      /* 72 be in exactly the same order as those above. */
 | 
			
		||||
  OP_CRQUERY,        /* 73 These are for character classes and back refs */
 | 
			
		||||
  OP_CRMINQUERY,     /* 74 */
 | 
			
		||||
  OP_CRRANGE,        /* 75 These are different to the three sets above. */
 | 
			
		||||
  OP_CRMINRANGE,     /* 76 */
 | 
			
		||||
 | 
			
		||||
  OP_CLASS,          /* 72 Match a character class, chars < 256 only */
 | 
			
		||||
  OP_NCLASS,         /* 73 Same, but the bitmap was created from a negative
 | 
			
		||||
  OP_CLASS,          /* 77 Match a character class, chars < 256 only */
 | 
			
		||||
  OP_NCLASS,         /* 78 Same, but the bitmap was created from a negative
 | 
			
		||||
                           class - the difference is relevant only when a UTF-8
 | 
			
		||||
                           character > 255 is encountered. */
 | 
			
		||||
 | 
			
		||||
  OP_XCLASS,         /* 74 Extended class for handling UTF-8 chars within the
 | 
			
		||||
  OP_XCLASS,         /* 79 Extended class for handling UTF-8 chars within the
 | 
			
		||||
                           class. This does both positive and negative. */
 | 
			
		||||
 | 
			
		||||
  OP_REF,            /* 75 Match a back reference */
 | 
			
		||||
  OP_RECURSE,        /* 76 Match a numbered subpattern (possibly recursive) */
 | 
			
		||||
  OP_CALLOUT,        /* 77 Call out to external function if provided */
 | 
			
		||||
  OP_REF,            /* 80 Match a back reference */
 | 
			
		||||
  OP_RECURSE,        /* 81 Match a numbered subpattern (possibly recursive) */
 | 
			
		||||
  OP_CALLOUT,        /* 82 Call out to external function if provided */
 | 
			
		||||
 | 
			
		||||
  OP_ALT,            /* 78 Start of alternation */
 | 
			
		||||
  OP_KET,            /* 79 End of group that doesn't have an unbounded repeat */
 | 
			
		||||
  OP_KETRMAX,        /* 80 These two must remain together and in this */
 | 
			
		||||
  OP_KETRMIN,        /* 81 order. They are for groups the repeat for ever. */
 | 
			
		||||
  OP_ALT,            /* 83 Start of alternation */
 | 
			
		||||
  OP_KET,            /* 84 End of group that doesn't have an unbounded repeat */
 | 
			
		||||
  OP_KETRMAX,        /* 85 These two must remain together and in this */
 | 
			
		||||
  OP_KETRMIN,        /* 86 order. They are for groups the repeat for ever. */
 | 
			
		||||
 | 
			
		||||
  /* The assertions must come before BRA, CBRA, ONCE, and COND.*/
 | 
			
		||||
 | 
			
		||||
  OP_ASSERT,         /* 82 Positive lookahead */
 | 
			
		||||
  OP_ASSERT_NOT,     /* 83 Negative lookahead */
 | 
			
		||||
  OP_ASSERTBACK,     /* 84 Positive lookbehind */
 | 
			
		||||
  OP_ASSERTBACK_NOT, /* 85 Negative lookbehind */
 | 
			
		||||
  OP_REVERSE,        /* 86 Move pointer back - used in lookbehind assertions */
 | 
			
		||||
  OP_ASSERT,         /* 87 Positive lookahead */
 | 
			
		||||
  OP_ASSERT_NOT,     /* 88 Negative lookahead */
 | 
			
		||||
  OP_ASSERTBACK,     /* 89 Positive lookbehind */
 | 
			
		||||
  OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */
 | 
			
		||||
  OP_REVERSE,        /* 91 Move pointer back - used in lookbehind assertions */
 | 
			
		||||
 | 
			
		||||
  /* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
 | 
			
		||||
  as there's a test for >= ONCE for a subpattern that isn't an assertion. */
 | 
			
		||||
 | 
			
		||||
  OP_ONCE,           /* 87 Atomic group */
 | 
			
		||||
  OP_BRA,            /* 88 Start of non-capturing bracket */
 | 
			
		||||
  OP_CBRA,           /* 89 Start of capturing bracket */
 | 
			
		||||
  OP_COND,           /* 90 Conditional group */
 | 
			
		||||
  OP_ONCE,           /* 92 Atomic group */
 | 
			
		||||
  OP_BRA,            /* 83 Start of non-capturing bracket */
 | 
			
		||||
  OP_CBRA,           /* 94 Start of capturing bracket */
 | 
			
		||||
  OP_COND,           /* 95 Conditional group */
 | 
			
		||||
 | 
			
		||||
  /* These three must follow the previous three, in the same order. There's a
 | 
			
		||||
  check for >= SBRA to distinguish the two sets. */
 | 
			
		||||
 | 
			
		||||
  OP_SBRA,           /* 91 Start of non-capturing bracket, check empty  */
 | 
			
		||||
  OP_SCBRA,          /* 92 Start of capturing bracket, check empty */
 | 
			
		||||
  OP_SCOND,          /* 93 Conditional group, check empty */
 | 
			
		||||
  OP_SBRA,           /* 96 Start of non-capturing bracket, check empty  */
 | 
			
		||||
  OP_SCBRA,          /* 97 Start of capturing bracket, check empty */
 | 
			
		||||
  OP_SCOND,          /* 98 Conditional group, check empty */
 | 
			
		||||
 | 
			
		||||
  OP_CREF,           /* 94 Used to hold a capture number as condition */
 | 
			
		||||
  OP_RREF,           /* 95 Used to hold a recursion number as condition */
 | 
			
		||||
  OP_DEF,            /* 96 The DEFINE condition */
 | 
			
		||||
  OP_CREF,           /* 99 Used to hold a capture number as condition */
 | 
			
		||||
  OP_RREF,           /* 100 Used to hold a recursion number as condition */
 | 
			
		||||
  OP_DEF,            /* 101 The DEFINE condition */
 | 
			
		||||
 | 
			
		||||
  OP_BRAZERO,        /* 97 These two must remain together and in this */
 | 
			
		||||
  OP_BRAMINZERO      /* 98 order. */
 | 
			
		||||
  OP_BRAZERO,        /* 102 These two must remain together and in this */
 | 
			
		||||
  OP_BRAMINZERO      /* 103 order. */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -696,10 +765,10 @@ enum {
 | 
			
		||||
for debugging. The macro is referenced only in pcre_printint.c. */
 | 
			
		||||
 | 
			
		||||
#define OP_NAME_LIST \
 | 
			
		||||
  "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d",                \
 | 
			
		||||
  "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d",         \
 | 
			
		||||
  "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \
 | 
			
		||||
  "notprop", "prop", "anynl", "extuni",                           \
 | 
			
		||||
  "\\Z", "\\z",                                                   \
 | 
			
		||||
  "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v",           \
 | 
			
		||||
  "extuni",  "\\Z", "\\z",                                        \
 | 
			
		||||
  "Opt", "^", "$", "char", "charnc", "not",                       \
 | 
			
		||||
  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
 | 
			
		||||
  "*+","++", "?+", "{",                                           \
 | 
			
		||||
@@ -726,9 +795,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */
 | 
			
		||||
 | 
			
		||||
#define OP_LENGTHS \
 | 
			
		||||
  1,                             /* End                                    */ \
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
 | 
			
		||||
  1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \
 | 
			
		||||
  1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \
 | 
			
		||||
  1, 1,                          /* Any, Anybyte                           */ \
 | 
			
		||||
  3, 3, 1, 1,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */ \
 | 
			
		||||
  3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \
 | 
			
		||||
  1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \
 | 
			
		||||
  1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \
 | 
			
		||||
  2,                             /* Char  - the minimum length             */ \
 | 
			
		||||
  2,                             /* Charnc  - the minimum length           */ \
 | 
			
		||||
@@ -788,7 +859,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
 | 
			
		||||
       ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
 | 
			
		||||
       ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
 | 
			
		||||
       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
 | 
			
		||||
       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57 };
 | 
			
		||||
       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58 };
 | 
			
		||||
 | 
			
		||||
/* The real format of the start of the pcre block; the index of names and the
 | 
			
		||||
code vector run on as long as necessary after the end. We store an explicit
 | 
			
		||||
@@ -877,21 +948,11 @@ typedef struct recursion_info {
 | 
			
		||||
  struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
 | 
			
		||||
  int group_num;                /* Number of group that was called */
 | 
			
		||||
  const uschar *after_call;     /* "Return value": points after the call in the expr */
 | 
			
		||||
  USPTR save_start;             /* Old value of md->start_match */
 | 
			
		||||
  USPTR save_start;             /* Old value of mstart */
 | 
			
		||||
  int *offset_save;             /* Pointer to start of saved offsets */
 | 
			
		||||
  int saved_max;                /* Number of saved offsets */
 | 
			
		||||
} recursion_info;
 | 
			
		||||
 | 
			
		||||
/* When compiling in a mode that doesn't use recursive calls to match(),
 | 
			
		||||
a structure is used to remember local variables on the heap. It is defined in
 | 
			
		||||
pcre_exec.c, close to the match() function, so that it is easy to keep it in
 | 
			
		||||
step with any changes of local variable. However, the pointer to the current
 | 
			
		||||
frame must be saved in some "static" place over a longjmp(). We declare the
 | 
			
		||||
structure here so that we can put a pointer in the match_data structure. NOTE:
 | 
			
		||||
This isn't used for a "normal" compilation of pcre. */
 | 
			
		||||
 | 
			
		||||
struct heapframe;
 | 
			
		||||
 | 
			
		||||
/* Structure for building a chain of data for holding the values of the subject
 | 
			
		||||
pointer at the start of each subpattern, so as to detect when an empty string
 | 
			
		||||
has been matched by a subpattern - to break infinite loops. */
 | 
			
		||||
@@ -928,7 +989,7 @@ typedef struct match_data {
 | 
			
		||||
  const uschar *start_code;     /* For use when recursing */
 | 
			
		||||
  USPTR  start_subject;         /* Start of the subject string */
 | 
			
		||||
  USPTR  end_subject;           /* End of the subject string */
 | 
			
		||||
  USPTR  start_match;           /* Start of this match attempt */
 | 
			
		||||
  USPTR  start_match_ptr;       /* Start of matched string */
 | 
			
		||||
  USPTR  end_match_ptr;         /* Subject position at end match */
 | 
			
		||||
  int    end_offset_top;        /* Highwater mark at end of match */
 | 
			
		||||
  int    capture_last;          /* Most recent capture number */
 | 
			
		||||
@@ -937,7 +998,6 @@ typedef struct match_data {
 | 
			
		||||
  int    eptrn;                 /* Next free eptrblock */
 | 
			
		||||
  recursion_info *recursive;    /* Linked list of recursion data */
 | 
			
		||||
  void  *callout_data;          /* To pass back to callouts */
 | 
			
		||||
  struct heapframe *thisframe;  /* Used only when compiling for no recursion */
 | 
			
		||||
} match_data;
 | 
			
		||||
 | 
			
		||||
/* A similar structure is used for the same purpose by the DFA matching
 | 
			
		||||
@@ -1024,16 +1084,16 @@ extern const uschar _pcre_OP_lengths[];
 | 
			
		||||
one of the exported public functions. They have to be "external" in the C
 | 
			
		||||
sense, but are not part of the PCRE public API. */
 | 
			
		||||
 | 
			
		||||
extern BOOL         _pcre_is_newline(const uschar *, const uschar *, int *,
 | 
			
		||||
                      BOOL);
 | 
			
		||||
extern BOOL         _pcre_is_newline(const uschar *, int, const uschar *,
 | 
			
		||||
                      int *, BOOL);
 | 
			
		||||
extern int          _pcre_ord2utf8(int, uschar *);
 | 
			
		||||
extern real_pcre   *_pcre_try_flipped(const real_pcre *, real_pcre *,
 | 
			
		||||
                      const pcre_study_data *, pcre_study_data *);
 | 
			
		||||
extern int          _pcre_ucp_findprop(const unsigned int, int *, int *);
 | 
			
		||||
extern unsigned int _pcre_ucp_othercase(const unsigned int);
 | 
			
		||||
extern int          _pcre_valid_utf8(const uschar *, int);
 | 
			
		||||
extern BOOL         _pcre_was_newline(const uschar *, const uschar *, int *,
 | 
			
		||||
                      BOOL);
 | 
			
		||||
extern BOOL         _pcre_was_newline(const uschar *, int, const uschar *,
 | 
			
		||||
                      int *, BOOL);
 | 
			
		||||
extern BOOL         _pcre_xclass(int, const uschar *);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -42,9 +42,8 @@ POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
one kind of newline is to be recognized. When a newline is found, its length is
 | 
			
		||||
returned. In principle, we could implement several newline "types", each
 | 
			
		||||
referring to a different set of newline characters. At present, PCRE supports
 | 
			
		||||
only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL,
 | 
			
		||||
so for now the type isn't passed into the functions. It can easily be added
 | 
			
		||||
later if required. The full list of Unicode newline characters is taken from
 | 
			
		||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
 | 
			
		||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
 | 
			
		||||
http://unicode.org/unicode/reports/tr18/. */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -61,6 +60,7 @@ string that is being processed.
 | 
			
		||||
 | 
			
		||||
Arguments:
 | 
			
		||||
  ptr          pointer to possible newline
 | 
			
		||||
  type         the newline type
 | 
			
		||||
  endptr       pointer to the end of the string
 | 
			
		||||
  lenptr       where to return the length
 | 
			
		||||
  utf8         TRUE if in utf8 mode
 | 
			
		||||
@@ -69,12 +69,23 @@ Returns:       TRUE or FALSE
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
BOOL
 | 
			
		||||
_pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr,
 | 
			
		||||
  BOOL utf8)
 | 
			
		||||
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
 | 
			
		||||
  int *lenptr, BOOL utf8)
 | 
			
		||||
{
 | 
			
		||||
int c;
 | 
			
		||||
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
 | 
			
		||||
switch(c)
 | 
			
		||||
 | 
			
		||||
if (type == NLTYPE_ANYCRLF) switch(c)
 | 
			
		||||
  {
 | 
			
		||||
  case 0x000a: *lenptr = 1; return TRUE;             /* LF */
 | 
			
		||||
  case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
 | 
			
		||||
               return TRUE;                          /* CR */
 | 
			
		||||
  default: return FALSE;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
/* NLTYPE_ANY */
 | 
			
		||||
 | 
			
		||||
else switch(c)
 | 
			
		||||
  {
 | 
			
		||||
  case 0x000a:                                       /* LF */
 | 
			
		||||
  case 0x000b:                                       /* VT */
 | 
			
		||||
@@ -99,6 +110,7 @@ the string that is being processed.
 | 
			
		||||
 | 
			
		||||
Arguments:
 | 
			
		||||
  ptr          pointer to possible newline
 | 
			
		||||
  type         the newline type
 | 
			
		||||
  startptr     pointer to the start of the string
 | 
			
		||||
  lenptr       where to return the length
 | 
			
		||||
  utf8         TRUE if in utf8 mode
 | 
			
		||||
@@ -107,8 +119,8 @@ Returns:       TRUE or FALSE
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
BOOL
 | 
			
		||||
_pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr,
 | 
			
		||||
  BOOL utf8)
 | 
			
		||||
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
 | 
			
		||||
  int *lenptr, BOOL utf8)
 | 
			
		||||
{
 | 
			
		||||
int c;
 | 
			
		||||
ptr--;
 | 
			
		||||
@@ -118,7 +130,16 @@ if (utf8)
 | 
			
		||||
  GETCHAR(c, ptr);
 | 
			
		||||
  }
 | 
			
		||||
else c = *ptr;
 | 
			
		||||
switch(c)
 | 
			
		||||
 | 
			
		||||
if (type == NLTYPE_ANYCRLF) switch(c)
 | 
			
		||||
  {
 | 
			
		||||
  case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
 | 
			
		||||
               return TRUE;                         /* LF */
 | 
			
		||||
  case 0x000d: *lenptr = 1; return TRUE;            /* CR */
 | 
			
		||||
  default: return FALSE;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
else switch(c)
 | 
			
		||||
  {
 | 
			
		||||
  case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
 | 
			
		||||
               return TRUE;                         /* LF */
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -62,6 +62,7 @@ Returns:     number of characters placed in the buffer
 | 
			
		||||
int
 | 
			
		||||
_pcre_ord2utf8(int cvalue, uschar *buffer)
 | 
			
		||||
{
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
register int i, j;
 | 
			
		||||
for (i = 0; i < _pcre_utf8_table1_size; i++)
 | 
			
		||||
  if (cvalue <= _pcre_utf8_table1[i]) break;
 | 
			
		||||
@@ -73,6 +74,9 @@ for (j = i; j > 0; j--)
 | 
			
		||||
 }
 | 
			
		||||
*buffer = _pcre_utf8_table2[i] | cvalue;
 | 
			
		||||
return i + 1;
 | 
			
		||||
#else
 | 
			
		||||
return 0;   /* Keep compiler happy; this function won't ever be */
 | 
			
		||||
#endif      /* called when SUPPORT_UTF8 is not defined. */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* End of pcre_ord2utf8.c */
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -63,7 +63,7 @@ Returns:        the (possibly updated) count value (a non-negative number), or
 | 
			
		||||
                a negative error number
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE int
 | 
			
		||||
PCRE_EXP_DEFN int
 | 
			
		||||
pcre_refcount(pcre *argument_re, int adjust)
 | 
			
		||||
{
 | 
			
		||||
real_pcre *re = (real_pcre *)argument_re;
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -394,11 +394,13 @@ do
 | 
			
		||||
      character with a value > 255. */
 | 
			
		||||
 | 
			
		||||
      case OP_NCLASS:
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
      if (utf8)
 | 
			
		||||
        {
 | 
			
		||||
        start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
 | 
			
		||||
        memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
 | 
			
		||||
        }
 | 
			
		||||
#endif
 | 
			
		||||
      /* Fall through */
 | 
			
		||||
 | 
			
		||||
      case OP_CLASS:
 | 
			
		||||
@@ -411,6 +413,7 @@ do
 | 
			
		||||
        value is > 127. In fact, there are only two possible starting bytes for
 | 
			
		||||
        characters in the range 128 - 255. */
 | 
			
		||||
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
        if (utf8)
 | 
			
		||||
          {
 | 
			
		||||
          for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
 | 
			
		||||
@@ -428,6 +431,7 @@ do
 | 
			
		||||
        /* In non-UTF-8 mode, the two bit maps are completely compatible. */
 | 
			
		||||
 | 
			
		||||
        else
 | 
			
		||||
#endif
 | 
			
		||||
          {
 | 
			
		||||
          for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
 | 
			
		||||
          }
 | 
			
		||||
@@ -487,7 +491,7 @@ Returns:    pointer to a pcre_extra block, with study_data filled in and the
 | 
			
		||||
            NULL on error or if no optimization possible
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE pcre_extra *
 | 
			
		||||
PCRE_EXP_DEFN pcre_extra *
 | 
			
		||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
 | 
			
		||||
{
 | 
			
		||||
uschar start_bits[32];
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -61,6 +61,8 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
 | 
			
		||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
 | 
			
		||||
character. */
 | 
			
		||||
 | 
			
		||||
#ifdef SUPPORT_UTF8
 | 
			
		||||
 | 
			
		||||
const int _pcre_utf8_table1[] =
 | 
			
		||||
  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
 | 
			
		||||
 | 
			
		||||
@@ -301,4 +303,6 @@ const ucp_type_table _pcre_utt[] = {
 | 
			
		||||
 | 
			
		||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
 | 
			
		||||
 | 
			
		||||
#endif  /* SUPPORT_UTF8 */
 | 
			
		||||
 | 
			
		||||
/* End of pcre_tables.c */
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -75,7 +75,7 @@ I could find no way of detecting that a macro is defined as an empty string at
 | 
			
		||||
pre-processor time. This hack uses a standard trick for avoiding calling
 | 
			
		||||
the STRING macro with an empty argument when doing the test. */
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE const char *
 | 
			
		||||
PCRE_EXP_DEFN const char *
 | 
			
		||||
pcre_version(void)
 | 
			
		||||
{
 | 
			
		||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2006 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2007 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user