mirror of
				https://gitlab.gnome.org/GNOME/glib.git
				synced 2025-10-31 08:22:16 +01:00 
			
		
		
		
	Update to PCRE 7.2
svn path=/trunk/; revision=5659
This commit is contained in:
		| @@ -1,3 +1,7 @@ | ||||
| 2007-07-31  Matthias Clasen  <mclasen@redhat.com> | ||||
|  | ||||
| 	* glib/pcre/*: Update the internal PCRE to 7.2 | ||||
|  | ||||
| 2007-07-31  Matthias Clasen  <mclasen@redhat.com> | ||||
|  | ||||
| 	* glib/pltcheck.sh: Fix some glitches | ||||
|   | ||||
| @@ -1,68 +1,5 @@ | ||||
| PCRE LICENCE | ||||
| ------------ | ||||
|  | ||||
| PCRE is a library of functions to support regular expressions whose syntax | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
| Release 7 of PCRE is distributed under the terms of the "BSD" licence, as | ||||
| specified below. The documentation for PCRE, supplied in the "doc" | ||||
| directory, is distributed under the same terms as the software itself. | ||||
|  | ||||
| The basic library functions are written in C and are freestanding. Also | ||||
| included in the distribution is a set of C++ wrapper functions. | ||||
|  | ||||
|  | ||||
| THE BASIC LIBRARY FUNCTIONS | ||||
| --------------------------- | ||||
|  | ||||
| Written by:       Philip Hazel | ||||
| Email local part: ph10 | ||||
| Email domain:     cam.ac.uk | ||||
|  | ||||
| University of Cambridge Computing Service, | ||||
| Cambridge, England. Phone: +44 1223 334714. | ||||
|  | ||||
| Copyright (c) 1997-2006 University of Cambridge | ||||
| All rights reserved. | ||||
|  | ||||
|  | ||||
| THE C++ WRAPPER FUNCTIONS | ||||
| ------------------------- | ||||
|  | ||||
| Contributed by:   Google Inc. | ||||
|  | ||||
| Copyright (c) 2006, Google Inc. | ||||
| All rights reserved. | ||||
|  | ||||
|  | ||||
| THE "BSD" LICENCE | ||||
| ----------------- | ||||
|  | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are met: | ||||
|  | ||||
|     * Redistributions of source code must retain the above copyright notice, | ||||
|       this list of conditions and the following disclaimer. | ||||
|  | ||||
|     * Redistributions in binary form must reproduce the above copyright | ||||
|       notice, this list of conditions and the following disclaimer in the | ||||
|       documentation and/or other materials provided with the distribution. | ||||
|  | ||||
|     * Neither the name of the University of Cambridge nor the name of Google | ||||
|       Inc. nor the names of their contributors may be used to endorse or | ||||
|       promote products derived from this software without specific prior | ||||
|       written permission. | ||||
|  | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||||
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||||
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||||
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||||
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||||
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||||
| POSSIBILITY OF SUCH DAMAGE. | ||||
| Please see the file LICENCE in the PCRE distribution for licensing details. | ||||
|  | ||||
| End | ||||
|   | ||||
| @@ -9,7 +9,7 @@ INCLUDES = \ | ||||
| 	-DMAX_NAME_COUNT=10000 \ | ||||
| 	-DMAX_DUPLENGTH=30000 \ | ||||
| 	-DLINK_SIZE=2 \ | ||||
| 	-DEBCDIC=0 \ | ||||
| 	-UEBCDIC \ | ||||
| 	-DPOSIX_MALLOC_THRESHOLD=10 \ | ||||
| 	-I$(top_srcdir) \ | ||||
| 	-I$(srcdir) \ | ||||
|   | ||||
| @@ -5,7 +5,7 @@ | ||||
| /* This is the public header file for the PCRE library, to be #included by | ||||
| applications that call the PCRE functions. | ||||
|  | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -41,47 +41,31 @@ POSSIBILITY OF SUCH DAMAGE. | ||||
|  | ||||
| /* The current PCRE version information. */ | ||||
|  | ||||
| /* NOTES FOR FUTURE MAINTAINERS: Do not use numbers with leading zeros, because | ||||
| they may be treated as octal constants. The PCRE_PRERELEASE feature is for | ||||
| identifying release candidates. It might be defined as -RC2, for example. In | ||||
| real releases, it should be defined empty. Do not change the alignment of these | ||||
| statments. The code in ./configure greps out the version numbers by using "cut" | ||||
| to get values from column 29 onwards. These are substituted into pcre-config | ||||
| and libpcre.pc. The values are not put into configure.ac and substituted here | ||||
| (which would simplify this issue) because that makes life harder for those who | ||||
| cannot run ./configure. As it now stands, this file need not be edited in that | ||||
| circumstance. */ | ||||
|  | ||||
| #define PCRE_MAJOR          7 | ||||
| #define PCRE_MINOR          0 | ||||
| #define PCRE_PRERELEASE | ||||
| #define PCRE_DATE           18-Dec-2006 | ||||
| #define PCRE_MINOR          2 | ||||
| #define PCRE_PRERELEASE      | ||||
| #define PCRE_DATE           2007-06-19 | ||||
|  | ||||
| /* Win32 uses DLL by default; it needs special stuff for exported functions | ||||
| when building PCRE. */ | ||||
| /* When an application links to a PCRE DLL in Windows, the symbols that are | ||||
| imported have to be identified as such. When building PCRE, the appropriate | ||||
| export setting is defined in pcre_internal.h, which includes this file. So we | ||||
| don't change an existing definition of PCRE_EXP_DECL. */ | ||||
|  | ||||
| /* But don't do that when building as part of GLib */ | ||||
| #if 0 | ||||
| #ifdef _WIN32 | ||||
| #  ifdef PCRE_DEFINITION | ||||
| #    ifdef DLL_EXPORT | ||||
| #      define PCRE_DATA_SCOPE __declspec(dllexport) | ||||
| #    endif | ||||
| #  else | ||||
| #ifndef PCRE_EXP_DECL | ||||
| #  ifdef _WIN32 | ||||
| #    ifndef PCRE_STATIC | ||||
| #      define PCRE_DATA_SCOPE extern __declspec(dllimport) | ||||
| #      define PCRE_EXP_DECL extern __declspec(dllimport) | ||||
| #    endif | ||||
| #  endif | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| /* Otherwise, we use the standard "extern". */ | ||||
| /* By default, we use the standard "extern" declarations. */ | ||||
|  | ||||
| #ifndef PCRE_DATA_SCOPE | ||||
| #ifndef PCRE_EXP_DECL | ||||
| #  ifdef __cplusplus | ||||
| #    define PCRE_DATA_SCOPE     extern "C" | ||||
| #    define PCRE_EXP_DECL       extern "C" | ||||
| #  else | ||||
| #    define PCRE_DATA_SCOPE     extern | ||||
| #    define PCRE_EXP_DECL       extern | ||||
| #  endif | ||||
| #endif | ||||
|  | ||||
| @@ -122,6 +106,7 @@ extern "C" { | ||||
| #define PCRE_NEWLINE_LF         0x00200000 | ||||
| #define PCRE_NEWLINE_CRLF       0x00300000 | ||||
| #define PCRE_NEWLINE_ANY        0x00400000 | ||||
| #define PCRE_NEWLINE_ANYCRLF    0x00500000 | ||||
|  | ||||
| /* Exec-time and get/set-time error codes */ | ||||
|  | ||||
| @@ -165,6 +150,8 @@ extern "C" { | ||||
| #define PCRE_INFO_NAMETABLE          9 | ||||
| #define PCRE_INFO_STUDYSIZE         10 | ||||
| #define PCRE_INFO_DEFAULT_TABLES    11 | ||||
| #define PCRE_INFO_OKPARTIAL         12 | ||||
| #define PCRE_INFO_JCHANGED          13 | ||||
|  | ||||
| /* Request types for pcre_config(). Do not re-arrange, in order to remain | ||||
| compatible. */ | ||||
| @@ -243,41 +230,41 @@ typedef struct pcre_callout_block { | ||||
| #define pcre_free g_free | ||||
| #define pcre_stack_malloc g_try_malloc | ||||
|  | ||||
| PCRE_DATA_SCOPE int   (*pcre_callout)(pcre_callout_block *); | ||||
| PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *); | ||||
|  | ||||
| /* Exported PCRE functions */ | ||||
|  | ||||
| PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *, | ||||
| PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *, | ||||
|                   const unsigned char *); | ||||
| PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **, | ||||
| PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **, | ||||
|                   int *, const unsigned char *); | ||||
| PCRE_DATA_SCOPE int  pcre_config(int, void *); | ||||
| PCRE_DATA_SCOPE int  pcre_copy_named_substring(const pcre *, const char *, | ||||
| PCRE_EXP_DECL int  pcre_config(int, void *); | ||||
| PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *, | ||||
|                   int *, int, const char *, char *, int); | ||||
| PCRE_DATA_SCOPE int  pcre_copy_substring(const char *, int *, int, int, char *, | ||||
| PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int, char *, | ||||
|                   int); | ||||
| PCRE_DATA_SCOPE int  pcre_dfa_exec(const pcre *, const pcre_extra *, | ||||
| PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *, | ||||
|                   const char *, int, int, int, int *, int , int *, int); | ||||
| PCRE_DATA_SCOPE int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR, | ||||
| PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR, | ||||
|                    int, int, int, int *, int); | ||||
| PCRE_DATA_SCOPE void pcre_free_substring(const char *); | ||||
| PCRE_DATA_SCOPE void pcre_free_substring_list(const char **); | ||||
| PCRE_DATA_SCOPE int  pcre_fullinfo(const pcre *, const pcre_extra *, int, | ||||
| PCRE_EXP_DECL void pcre_free_substring(const char *); | ||||
| PCRE_EXP_DECL void pcre_free_substring_list(const char **); | ||||
| PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int, | ||||
|                   void *); | ||||
| PCRE_DATA_SCOPE int  pcre_get_named_substring(const pcre *, const char *, | ||||
| PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *, | ||||
|                   int *, int, const char *, const char **); | ||||
| PCRE_DATA_SCOPE int  pcre_get_stringnumber(const pcre *, const char *); | ||||
| PCRE_DATA_SCOPE int  pcre_get_stringtable_entries(const pcre *, const char *, | ||||
| PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *); | ||||
| PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *, | ||||
|                   char **, char **); | ||||
| PCRE_DATA_SCOPE int  pcre_get_substring(const char *, int *, int, int, | ||||
| PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int, | ||||
|                   const char **); | ||||
| PCRE_DATA_SCOPE int  pcre_get_substring_list(const char *, int *, int, | ||||
| PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int, | ||||
|                   const char ***); | ||||
| PCRE_DATA_SCOPE int  pcre_info(const pcre *, int *, int *); | ||||
| PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void); | ||||
| PCRE_DATA_SCOPE int  pcre_refcount(pcre *, int); | ||||
| PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **); | ||||
| PCRE_DATA_SCOPE const char *pcre_version(void); | ||||
| PCRE_EXP_DECL int  pcre_info(const pcre *, int *, int *); | ||||
| PCRE_EXP_DECL const unsigned char *pcre_maketables(void); | ||||
| PCRE_EXP_DECL int  pcre_refcount(pcre *, int); | ||||
| PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **); | ||||
| PCRE_EXP_DECL const char *pcre_version(void); | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| }  /* extern "C" */ | ||||
|   | ||||
| @@ -1,24 +1,24 @@ | ||||
| /* This file is autogenerated by ../update-pcre/update.sh during | ||||
|  * the update of the local copy of PCRE. | ||||
|  */ | ||||
| /************************************************* | ||||
| *      Perl-Compatible Regular Expressions       * | ||||
| *************************************************/ | ||||
|  | ||||
| /* This file is automatically written by the dftables auxiliary  | ||||
| program. If you edit it by hand, you might like to edit the Makefile to  | ||||
| prevent its ever being regenerated. | ||||
| /* This file contains character tables that are used when no external tables | ||||
| are passed to PCRE by the application that calls it. The tables are used only | ||||
| for characters whose code values are less than 256. | ||||
|  | ||||
| This file contains the default tables for characters with codes less than | ||||
| 128 (ASCII characters). These tables are used when no external tables are | ||||
| passed to PCRE. | ||||
| This is a default version of the tables that assumes ASCII encoding. A program | ||||
| called dftables (which is distributed with PCRE) can be used to build | ||||
| alternative versions of this file. This is necessary if you are running in an | ||||
| EBCDIC environment, or if you want to default to a different encoding, for | ||||
| example ISO-8859-1. When dftables is run, it creates these tables in the | ||||
| current locale. If PCRE is configured with --enable-rebuild-chartables, this | ||||
| happens automatically. | ||||
|  | ||||
| The following #include is present because without it gcc 4.x may remove | ||||
| the array definition from the final binary if PCRE is built into a static | ||||
| library and dead code stripping is activated. This leads to link errors. | ||||
| Pulling in the header ensures that the array gets flagged as "someone | ||||
| outside this compilation unit might reference this" and so it will always | ||||
| be supplied to the linker. */ | ||||
| The following #include is present because without it gcc 4.x may remove the | ||||
| array definition from the final binary if PCRE is built into a static library | ||||
| and dead code stripping is activated. This leads to link errors. Pulling in the | ||||
| header ensures that the array gets flagged as "someone outside this compilation | ||||
| unit might reference this" and so it will always be supplied to the linker. */ | ||||
|  | ||||
| #include "pcre_internal.h" | ||||
|  | ||||
| @@ -94,11 +94,10 @@ const unsigned char _pcre_default_tables[] = { | ||||
|   240,241,242,243,244,245,246,247, | ||||
|   248,249,250,251,252,253,254,255, | ||||
|  | ||||
| /* This table contains bit maps for various character classes. | ||||
| Each map is 32 bytes long and the bits run from the least | ||||
| significant end of each byte. The classes that have their own | ||||
| maps are: space, xdigit, digit, upper, lower, word, graph | ||||
| print, punct, and cntrl. Other classes are built from combinations. */ | ||||
| /* This table contains bit maps for various character classes. Each map is 32 | ||||
| bytes long and the bits run from the least significant end of each byte. The | ||||
| classes that have their own maps are: space, xdigit, digit, upper, lower, word, | ||||
| graph, print, punct, and cntrl. Other classes are built from combinations. */ | ||||
|  | ||||
|   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, | ||||
|   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | ||||
| @@ -192,4 +191,4 @@ print, punct, and cntrl. Other classes are built from combinations. */ | ||||
|   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ | ||||
|   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ | ||||
|  | ||||
| /* End of chartables.c */ | ||||
| /* End of pcre_chartables.c */ | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -58,6 +58,11 @@ used by pcretest. DEBUG is not defined when building a production library. */ | ||||
| #endif | ||||
|  | ||||
|  | ||||
| /* Macro for setting individual bits in class bitmaps. */ | ||||
|  | ||||
| #define SETBIT(a,b) a[b/8] |= (1 << (b%8)) | ||||
|  | ||||
|  | ||||
| /************************************************* | ||||
| *      Code parameters and static tables         * | ||||
| *************************************************/ | ||||
| @@ -82,21 +87,21 @@ are simple data values; negative values are for special things like \d and so | ||||
| on. Zero means further processing is needed (for things like \x), or the escape | ||||
| is invalid. */ | ||||
|  | ||||
| #if !EBCDIC   /* This is the "normal" table for ASCII systems */ | ||||
| #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */ | ||||
| static const short int escapes[] = { | ||||
|      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */ | ||||
|      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */ | ||||
|    '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */ | ||||
|      0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */ | ||||
| -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */ | ||||
| -ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */ | ||||
| -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */ | ||||
| -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */ | ||||
|    '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */ | ||||
|      0,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */ | ||||
| -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0,      0, -ESC_w,   /* p - w */ | ||||
| -ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */ | ||||
| -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */ | ||||
|      0,      0, -ESC_z                                            /* x - z */ | ||||
| }; | ||||
|  | ||||
| #else         /* This is the "abnormal" table for EBCDIC systems */ | ||||
| #else           /* This is the "abnormal" table for EBCDIC systems */ | ||||
| static const short int escapes[] = { | ||||
| /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|', | ||||
| /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0, | ||||
| @@ -106,18 +111,18 @@ static const short int escapes[] = { | ||||
| /*  70 */     0,     0,      0,       0,      0,     0,      0,      0, | ||||
| /*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"', | ||||
| /*  80 */     0,     7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0, | ||||
| /*  88 */     0,     0,      0,     '{',      0,     0,      0,      0, | ||||
| /*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0, | ||||
| /*  90 */     0,     0, -ESC_k,     'l',      0, ESC_n,      0, -ESC_p, | ||||
| /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0, | ||||
| /*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,     0, -ESC_w,      0, | ||||
| /*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0, | ||||
| /*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0, | ||||
| /*  B0 */     0,     0,      0,       0,      0,     0,      0,      0, | ||||
| /*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-', | ||||
| /*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G, | ||||
| /*  C8 */     0,     0,      0,       0,      0,     0,      0,      0, | ||||
| /*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0, | ||||
| /*  D0 */   '}',     0,      0,       0,      0,     0,      0, -ESC_P, | ||||
| /*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0, | ||||
| /*  E0 */  '\\',     0, -ESC_S,       0,      0,     0, -ESC_W, -ESC_X, | ||||
| /*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X, | ||||
| /*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0, | ||||
| /*  F0 */     0,     0,      0,       0,      0,     0,      0,      0, | ||||
| /*  F8 */     0,     0,      0,       0,      0,     0,      0,      0 | ||||
| @@ -186,7 +191,7 @@ are no longer used. */ | ||||
|  | ||||
| #define DEAD(s) "\0" | ||||
|  | ||||
| static const char error_texts[] = | ||||
| static const char error_texts[] =  | ||||
|   "no error\0" | ||||
|   "\\ at end of pattern\0" | ||||
|   "\\c at end of pattern\0" | ||||
| @@ -221,7 +226,7 @@ static const char error_texts[] = | ||||
|   "malformed number or name after (?(\0" | ||||
|   "conditional group contains more than two branches\0" | ||||
|   "assertion expected after (?(\0" | ||||
|   "(?R or (?digits must be followed by )\0" | ||||
|   "(?R or (?[+-]digits must be followed by )\0" | ||||
|   /* 30 */ | ||||
|   "unknown POSIX class name\0" | ||||
|   "POSIX collating elements are not supported\0" | ||||
| @@ -255,7 +260,8 @@ static const char error_texts[] = | ||||
|   /* 55 */ | ||||
|   "repeating a DEFINE group is not allowed\0" | ||||
|   "inconsistent NEWLINE options\0" | ||||
|   "\\g is not followed by an (optionally braced) non-zero number"; | ||||
|   "\\g is not followed by a braced name or an optionally braced non-zero number\0" | ||||
|   "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"; | ||||
|  | ||||
| static const int error_texts_offsets[] = { | ||||
|   0, | ||||
| @@ -315,15 +321,14 @@ static const int error_texts_offsets[] = { | ||||
|   1796, | ||||
|   1839, | ||||
|   1879, | ||||
|   1908 | ||||
|   1908, | ||||
|   1984 | ||||
| }; | ||||
|  | ||||
|  | ||||
| /* Definition to allow mutual recursion */ | ||||
|  | ||||
| static BOOL | ||||
|   compile_regex(int, int, uschar **, const uschar **, int *, BOOL, int, int *, | ||||
|     int *, branch_chain *, compile_data *, int *); | ||||
|   compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int, | ||||
|     int *, int *, branch_chain *, compile_data *, int *); | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -370,11 +375,11 @@ if (c == 0) *errorcodeptr = ERR1; | ||||
| a table. A non-zero result is something that can be returned immediately. | ||||
| Otherwise further processing may be required. */ | ||||
|  | ||||
| #if !EBCDIC    /* ASCII coding */ | ||||
| #ifndef EBCDIC  /* ASCII coding */ | ||||
| else if (c < '0' || c > 'z') {}                           /* Not alphameric */ | ||||
| else if ((i = escapes[c - '0']) != 0) c = i; | ||||
|  | ||||
| #else          /* EBCDIC coding */ | ||||
| #else           /* EBCDIC coding */ | ||||
| else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */ | ||||
| else if ((i = escapes[c - 0x48]) != 0)  c = i; | ||||
| #endif | ||||
| @@ -401,11 +406,22 @@ else | ||||
|  | ||||
|     /* \g must be followed by a number, either plain or braced. If positive, it | ||||
|     is an absolute backreference. If negative, it is a relative backreference. | ||||
|     This is a Perl 5.10 feature. */ | ||||
|     This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a | ||||
|     reference to a named group. This is part of Perl's movement towards a | ||||
|     unified syntax for back references. As this is synonymous with \k{name}, we | ||||
|     fudge it up by pretending it really was \k. */ | ||||
|  | ||||
|     case 'g': | ||||
|     if (ptr[1] == '{') | ||||
|       { | ||||
|       const uschar *p; | ||||
|       for (p = ptr+2; *p != 0 && *p != '}'; p++) | ||||
|         if (*p != '-' && g_ascii_isdigit(*p) == 0) break; | ||||
|       if (*p != 0 && *p != '}') | ||||
|         { | ||||
|         c = -ESC_k; | ||||
|         break; | ||||
|         } | ||||
|       braced = TRUE; | ||||
|       ptr++; | ||||
|       } | ||||
| @@ -511,10 +527,10 @@ else | ||||
|         if (c == 0 && cc == '0') continue;     /* Leading zeroes */ | ||||
|         count++; | ||||
|  | ||||
| #if !EBCDIC    /* ASCII coding */ | ||||
| #ifndef EBCDIC  /* ASCII coding */ | ||||
|         if (cc >= 'a') cc -= 32;               /* Convert to upper case */ | ||||
|         c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10)); | ||||
| #else          /* EBCDIC coding */ | ||||
| #else           /* EBCDIC coding */ | ||||
|         if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */ | ||||
|         c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10)); | ||||
| #endif | ||||
| @@ -538,10 +554,10 @@ else | ||||
|       { | ||||
|       int cc;                               /* Some compilers don't like ++ */ | ||||
|       cc = *(++ptr);                        /* in initializers */ | ||||
| #if !EBCDIC    /* ASCII coding */ | ||||
| #ifndef EBCDIC  /* ASCII coding */ | ||||
|       if (cc >= 'a') cc -= 32;              /* Convert to upper case */ | ||||
|       c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); | ||||
| #else          /* EBCDIC coding */ | ||||
| #else           /* EBCDIC coding */ | ||||
|       if (cc <= 'z') cc += 64;              /* Convert to upper case */ | ||||
|       c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10)); | ||||
| #endif | ||||
| @@ -560,10 +576,10 @@ else | ||||
|       return 0; | ||||
|       } | ||||
|  | ||||
| #if !EBCDIC    /* ASCII coding */ | ||||
| #ifndef EBCDIC  /* ASCII coding */ | ||||
|     if (c >= 'a' && c <= 'z') c -= 32; | ||||
|     c ^= 0x40; | ||||
| #else          /* EBCDIC coding */ | ||||
| #else           /* EBCDIC coding */ | ||||
|     if (c >= 'a' && c <= 'z') c += 64; | ||||
|     c ^= 0xC0; | ||||
| #endif | ||||
| @@ -1195,6 +1211,7 @@ for (;;) | ||||
|   else | ||||
|     { | ||||
|     code += _pcre_OP_lengths[c]; | ||||
| #ifdef SUPPORT_UTF8 | ||||
|     if (utf8) switch(c) | ||||
|       { | ||||
|       case OP_CHAR: | ||||
| @@ -1215,6 +1232,7 @@ for (;;) | ||||
|       if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; | ||||
|       break; | ||||
|       } | ||||
| #endif | ||||
|     } | ||||
|   } | ||||
| } | ||||
| @@ -1258,6 +1276,7 @@ for (;;) | ||||
|   else | ||||
|     { | ||||
|     code += _pcre_OP_lengths[c]; | ||||
| #ifdef SUPPORT_UTF8 | ||||
|     if (utf8) switch(c) | ||||
|       { | ||||
|       case OP_CHAR: | ||||
| @@ -1278,6 +1297,7 @@ for (;;) | ||||
|       if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; | ||||
|       break; | ||||
|       } | ||||
| #endif | ||||
|     } | ||||
|   } | ||||
| } | ||||
| @@ -1315,6 +1335,18 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE | ||||
|  | ||||
|   c = *code; | ||||
|  | ||||
|   /* Groups with zero repeats can of course be empty; skip them. */ | ||||
|  | ||||
|   if (c == OP_BRAZERO || c == OP_BRAMINZERO) | ||||
|     { | ||||
|     code += _pcre_OP_lengths[c]; | ||||
|     do code += GET(code, 1); while (*code == OP_ALT); | ||||
|     c = *code; | ||||
|     continue; | ||||
|     } | ||||
|  | ||||
|   /* For other groups, scan the branches. */ | ||||
|  | ||||
|   if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE) | ||||
|     { | ||||
|     BOOL empty_branch; | ||||
| @@ -1331,12 +1363,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE | ||||
|       } | ||||
|     while (*code == OP_ALT); | ||||
|     if (!empty_branch) return FALSE;   /* All branches are non-empty */ | ||||
|  | ||||
|     /* Move past the KET and fudge things so that the increment in the "for" | ||||
|     above has no effect. */ | ||||
|  | ||||
|     c = OP_END; | ||||
|     code += 1 + LINK_SIZE - _pcre_OP_lengths[c]; | ||||
|     c = *code; | ||||
|     continue; | ||||
|     } | ||||
|  | ||||
| @@ -1530,8 +1557,8 @@ check_posix_name(const uschar *ptr, int len) | ||||
|   int yield = 0; | ||||
|   while (posix_name_lengths[yield] != 0) | ||||
|     { | ||||
|       if (len == posix_name_lengths[yield] && | ||||
| 	  strcmp((const char *)ptr, posix_names + offset) == 0) return yield; | ||||
|      if (len == posix_name_lengths[yield] && | ||||
|          strcmp((const char *)ptr, posix_names + offset) == 0) return yield; | ||||
|       offset += posix_name_lengths[yield] + 1; | ||||
|       yield++; | ||||
|     } | ||||
| @@ -1872,6 +1899,50 @@ if (next >= 0) switch(op_code) | ||||
|   case OP_NOT_WORDCHAR: | ||||
|   return next <= 127 && (cd->ctypes[next] & ctype_word) != 0; | ||||
|  | ||||
|   case OP_HSPACE: | ||||
|   case OP_NOT_HSPACE: | ||||
|   switch(next) | ||||
|     { | ||||
|     case 0x09: | ||||
|     case 0x20: | ||||
|     case 0xa0: | ||||
|     case 0x1680: | ||||
|     case 0x180e: | ||||
|     case 0x2000: | ||||
|     case 0x2001: | ||||
|     case 0x2002: | ||||
|     case 0x2003: | ||||
|     case 0x2004: | ||||
|     case 0x2005: | ||||
|     case 0x2006: | ||||
|     case 0x2007: | ||||
|     case 0x2008: | ||||
|     case 0x2009: | ||||
|     case 0x200A: | ||||
|     case 0x202f: | ||||
|     case 0x205f: | ||||
|     case 0x3000: | ||||
|     return op_code != OP_HSPACE; | ||||
|     default: | ||||
|     return op_code == OP_HSPACE; | ||||
|     } | ||||
|  | ||||
|   case OP_VSPACE: | ||||
|   case OP_NOT_VSPACE: | ||||
|   switch(next) | ||||
|     { | ||||
|     case 0x0a: | ||||
|     case 0x0b: | ||||
|     case 0x0c: | ||||
|     case 0x0d: | ||||
|     case 0x85: | ||||
|     case 0x2028: | ||||
|     case 0x2029: | ||||
|     return op_code != OP_VSPACE; | ||||
|     default: | ||||
|     return op_code == OP_VSPACE; | ||||
|     } | ||||
|  | ||||
|   default: | ||||
|   return FALSE; | ||||
|   } | ||||
| @@ -1906,12 +1977,57 @@ switch(op_code) | ||||
|     case ESC_W: | ||||
|     return item <= 127 && (cd->ctypes[item] & ctype_word) != 0; | ||||
|  | ||||
|     case ESC_h: | ||||
|     case ESC_H: | ||||
|     switch(item) | ||||
|       { | ||||
|       case 0x09: | ||||
|       case 0x20: | ||||
|       case 0xa0: | ||||
|       case 0x1680: | ||||
|       case 0x180e: | ||||
|       case 0x2000: | ||||
|       case 0x2001: | ||||
|       case 0x2002: | ||||
|       case 0x2003: | ||||
|       case 0x2004: | ||||
|       case 0x2005: | ||||
|       case 0x2006: | ||||
|       case 0x2007: | ||||
|       case 0x2008: | ||||
|       case 0x2009: | ||||
|       case 0x200A: | ||||
|       case 0x202f: | ||||
|       case 0x205f: | ||||
|       case 0x3000: | ||||
|       return -next != ESC_h; | ||||
|       default: | ||||
|       return -next == ESC_h; | ||||
|       } | ||||
|  | ||||
|     case ESC_v: | ||||
|     case ESC_V: | ||||
|     switch(item) | ||||
|       { | ||||
|       case 0x0a: | ||||
|       case 0x0b: | ||||
|       case 0x0c: | ||||
|       case 0x0d: | ||||
|       case 0x85: | ||||
|       case 0x2028: | ||||
|       case 0x2029: | ||||
|       return -next != ESC_v; | ||||
|       default: | ||||
|       return -next == ESC_v; | ||||
|       } | ||||
|  | ||||
|     default: | ||||
|     return FALSE; | ||||
|     } | ||||
|  | ||||
|   case OP_DIGIT: | ||||
|   return next == -ESC_D || next == -ESC_s || next == -ESC_W; | ||||
|   return next == -ESC_D || next == -ESC_s || next == -ESC_W || | ||||
|          next == -ESC_h || next == -ESC_v; | ||||
|  | ||||
|   case OP_NOT_DIGIT: | ||||
|   return next == -ESC_d; | ||||
| @@ -1920,10 +2036,23 @@ switch(op_code) | ||||
|   return next == -ESC_S || next == -ESC_d || next == -ESC_w; | ||||
|  | ||||
|   case OP_NOT_WHITESPACE: | ||||
|   return next == -ESC_s; | ||||
|   return next == -ESC_s || next == -ESC_h || next == -ESC_v; | ||||
|  | ||||
|   case OP_HSPACE: | ||||
|   return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w; | ||||
|  | ||||
|   case OP_NOT_HSPACE: | ||||
|   return next == -ESC_h; | ||||
|  | ||||
|   /* Can't have \S in here because VT matches \S (Perl anomaly) */ | ||||
|   case OP_VSPACE: | ||||
|   return next == -ESC_V || next == -ESC_d || next == -ESC_w; | ||||
|  | ||||
|   case OP_NOT_VSPACE: | ||||
|   return next == -ESC_v; | ||||
|  | ||||
|   case OP_WORDCHAR: | ||||
|   return next == -ESC_W || next == -ESC_s; | ||||
|   return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v; | ||||
|  | ||||
|   case OP_NOT_WORDCHAR: | ||||
|   return next == -ESC_w || next == -ESC_d; | ||||
| @@ -2038,10 +2167,12 @@ for (;; ptr++) | ||||
|   BOOL possessive_quantifier; | ||||
|   BOOL is_quantifier; | ||||
|   BOOL is_recurse; | ||||
|   BOOL reset_bracount; | ||||
|   int class_charcount; | ||||
|   int class_lastchar; | ||||
|   int newoptions; | ||||
|   int recno; | ||||
|   int refsign; | ||||
|   int skipbytes; | ||||
|   int subreqbyte; | ||||
|   int subfirstbyte; | ||||
| @@ -2466,6 +2597,133 @@ for (;; ptr++) | ||||
|           else if (c == -ESC_d || c == -ESC_D || c == -ESC_w || | ||||
|                    c == -ESC_W || c == -ESC_s || c == -ESC_S) continue; | ||||
|  | ||||
|           /* We need to deal with \H, \h, \V, and \v in both phases because | ||||
|           they use extra memory. */ | ||||
|  | ||||
|           if (-c == ESC_h) | ||||
|             { | ||||
|             SETBIT(classbits, 0x09); /* VT */ | ||||
|             SETBIT(classbits, 0x20); /* SPACE */ | ||||
|             SETBIT(classbits, 0xa0); /* NSBP */ | ||||
| #ifdef SUPPORT_UTF8 | ||||
|             if (utf8) | ||||
|               { | ||||
|               class_utf8 = TRUE; | ||||
|               *class_utf8data++ = XCL_SINGLE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data); | ||||
|               *class_utf8data++ = XCL_SINGLE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data); | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data); | ||||
|               *class_utf8data++ = XCL_SINGLE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data); | ||||
|               *class_utf8data++ = XCL_SINGLE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data); | ||||
|               *class_utf8data++ = XCL_SINGLE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data); | ||||
|               } | ||||
| #endif | ||||
|             continue; | ||||
|             } | ||||
|  | ||||
|           if (-c == ESC_H) | ||||
|             { | ||||
|             for (c = 0; c < 32; c++) | ||||
|               { | ||||
|               int x = 0xff; | ||||
|               switch (c) | ||||
|                 { | ||||
|                 case 0x09/8: x ^= 1 << (0x09%8); break; | ||||
|                 case 0x20/8: x ^= 1 << (0x20%8); break; | ||||
|                 case 0xa0/8: x ^= 1 << (0xa0%8); break; | ||||
|                 default: break; | ||||
|                 } | ||||
|               classbits[c] |= x; | ||||
|               } | ||||
|  | ||||
| #ifdef SUPPORT_UTF8 | ||||
|             if (utf8) | ||||
|               { | ||||
|               class_utf8 = TRUE; | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data); | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data); | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data); | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data); | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data); | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data); | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data); | ||||
|               } | ||||
| #endif | ||||
|             continue; | ||||
|             } | ||||
|  | ||||
|           if (-c == ESC_v) | ||||
|             { | ||||
|             SETBIT(classbits, 0x0a); /* LF */ | ||||
|             SETBIT(classbits, 0x0b); /* VT */ | ||||
|             SETBIT(classbits, 0x0c); /* FF */ | ||||
|             SETBIT(classbits, 0x0d); /* CR */ | ||||
|             SETBIT(classbits, 0x85); /* NEL */ | ||||
| #ifdef SUPPORT_UTF8 | ||||
|             if (utf8) | ||||
|               { | ||||
|               class_utf8 = TRUE; | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data); | ||||
|               } | ||||
| #endif | ||||
|             continue; | ||||
|             } | ||||
|  | ||||
|           if (-c == ESC_V) | ||||
|             { | ||||
|             for (c = 0; c < 32; c++) | ||||
|               { | ||||
|               int x = 0xff; | ||||
|               switch (c) | ||||
|                 { | ||||
|                 case 0x0a/8: x ^= 1 << (0x0a%8); | ||||
|                              x ^= 1 << (0x0b%8); | ||||
|                              x ^= 1 << (0x0c%8); | ||||
|                              x ^= 1 << (0x0d%8); | ||||
|                              break; | ||||
|                 case 0x85/8: x ^= 1 << (0x85%8); break; | ||||
|                 default: break; | ||||
|                 } | ||||
|               classbits[c] |= x; | ||||
|               } | ||||
|  | ||||
| #ifdef SUPPORT_UTF8 | ||||
|             if (utf8) | ||||
|               { | ||||
|               class_utf8 = TRUE; | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data); | ||||
|               *class_utf8data++ = XCL_RANGE; | ||||
|               class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data); | ||||
|               class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data); | ||||
|               } | ||||
| #endif | ||||
|             continue; | ||||
|             } | ||||
|  | ||||
|           /* We need to deal with \P and \p in both phases. */ | ||||
|  | ||||
| #ifdef SUPPORT_UCP | ||||
| @@ -2606,14 +2864,18 @@ for (;; ptr++) | ||||
|             unsigned int origd = d; | ||||
|             while (get_othercase_range(&cc, origd, &occ, &ocd)) | ||||
|               { | ||||
|               if (occ >= c && ocd <= d) continue;  /* Skip embedded ranges */ | ||||
|               if (occ >= (unsigned int)c && | ||||
|                   ocd <= (unsigned int)d) | ||||
|                 continue;                          /* Skip embedded ranges */ | ||||
|  | ||||
|               if (occ < c  && ocd >= c - 1)        /* Extend the basic range */ | ||||
|               if (occ < (unsigned int)c  && | ||||
|                   ocd >= (unsigned int)c - 1)      /* Extend the basic range */ | ||||
|                 {                                  /* if there is overlap,   */ | ||||
|                 c = occ;                           /* noting that if occ < c */ | ||||
|                 continue;                          /* we can't have ocd > d  */ | ||||
|                 }                                  /* because a subrange is  */ | ||||
|               if (ocd > d && occ <= d + 1)         /* always shorter than    */ | ||||
|               if (ocd > (unsigned int)d && | ||||
|                   occ <= (unsigned int)d + 1)      /* always shorter than    */ | ||||
|                 {                                  /* the basic range.       */ | ||||
|                 d = ocd; | ||||
|                 continue; | ||||
| @@ -3511,6 +3773,7 @@ for (;; ptr++) | ||||
|     skipbytes = 0; | ||||
|     bravalue = OP_CBRA; | ||||
|     save_hwm = cd->hwm; | ||||
|     reset_bracount = FALSE; | ||||
|  | ||||
|     if (*(++ptr) == '?') | ||||
|       { | ||||
| @@ -3532,6 +3795,11 @@ for (;; ptr++) | ||||
|         continue; | ||||
|  | ||||
|  | ||||
|         /* ------------------------------------------------------------ */ | ||||
|         case '|':                 /* Reset capture count for each branch */ | ||||
|         reset_bracount = TRUE; | ||||
|         /* Fall through */ | ||||
|  | ||||
|         /* ------------------------------------------------------------ */ | ||||
|         case ':':                 /* Non-capturing bracket */ | ||||
|         bravalue = OP_BRA; | ||||
| @@ -3568,6 +3836,7 @@ for (;; ptr++) | ||||
|  | ||||
|         code[1+LINK_SIZE] = OP_CREF; | ||||
|         skipbytes = 3; | ||||
|         refsign = -1; | ||||
|  | ||||
|         /* Check for a test for recursion in a named group. */ | ||||
|  | ||||
| @@ -3591,7 +3860,11 @@ for (;; ptr++) | ||||
|           terminator = '\''; | ||||
|           ptr++; | ||||
|           } | ||||
|         else terminator = 0; | ||||
|         else | ||||
|           { | ||||
|           terminator = 0; | ||||
|           if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr); | ||||
|           } | ||||
|  | ||||
|         /* We now expect to read a name; any thing else is an error */ | ||||
|  | ||||
| @@ -3627,7 +3900,32 @@ for (;; ptr++) | ||||
|         if (lengthptr != NULL) break; | ||||
|  | ||||
|         /* In the real compile we do the work of looking for the actual | ||||
|         reference. */ | ||||
|         reference. If the string started with "+" or "-" we require the rest to | ||||
|         be digits, in which case recno will be set. */ | ||||
|  | ||||
|         if (refsign > 0) | ||||
|           { | ||||
|           if (recno <= 0) | ||||
|             { | ||||
|             *errorcodeptr = ERR58; | ||||
|             goto FAILED; | ||||
|             } | ||||
|           if (refsign == '-') | ||||
|             { | ||||
|             recno = cd->bracount - recno + 1; | ||||
|             if (recno <= 0) | ||||
|               { | ||||
|               *errorcodeptr = ERR15; | ||||
|               goto FAILED; | ||||
|               } | ||||
|             } | ||||
|           else recno += cd->bracount; | ||||
|           PUT2(code, 2+LINK_SIZE, recno); | ||||
|           break; | ||||
|           } | ||||
|  | ||||
|         /* Otherwise (did not start with "+" or "-"), start by looking for the | ||||
|         name. */ | ||||
|  | ||||
|         slot = cd->name_table; | ||||
|         for (i = 0; i < cd->names_found; i++) | ||||
| @@ -3946,19 +4244,54 @@ for (;; ptr++) | ||||
|  | ||||
|  | ||||
|         /* ------------------------------------------------------------ */ | ||||
|         case '-': case '+': | ||||
|         case '0': case '1': case '2': case '3': case '4':   /* Recursion or */ | ||||
|         case '5': case '6': case '7': case '8': case '9':   /* subroutine */ | ||||
|           { | ||||
|           const uschar *called; | ||||
|  | ||||
|           if ((refsign = *ptr) == '+') ptr++; | ||||
|           else if (refsign == '-') | ||||
|             { | ||||
|             if (g_ascii_isdigit(ptr[1]) == 0) | ||||
|               goto OTHER_CHAR_AFTER_QUERY; | ||||
|             ptr++; | ||||
|             } | ||||
|  | ||||
|           recno = 0; | ||||
|           while(g_ascii_isdigit(*ptr) != 0) | ||||
|             recno = recno * 10 + *ptr++ - '0'; | ||||
|  | ||||
|           if (*ptr != ')') | ||||
|             { | ||||
|             *errorcodeptr = ERR29; | ||||
|             goto FAILED; | ||||
|             } | ||||
|  | ||||
|           if (refsign == '-') | ||||
|             { | ||||
|             if (recno == 0) | ||||
|               { | ||||
|               *errorcodeptr = ERR58; | ||||
|               goto FAILED; | ||||
|               } | ||||
|             recno = cd->bracount - recno + 1; | ||||
|             if (recno <= 0) | ||||
|               { | ||||
|               *errorcodeptr = ERR15; | ||||
|               goto FAILED; | ||||
|               } | ||||
|             } | ||||
|           else if (refsign == '+') | ||||
|             { | ||||
|             if (recno == 0) | ||||
|               { | ||||
|               *errorcodeptr = ERR58; | ||||
|               goto FAILED; | ||||
|               } | ||||
|             recno += cd->bracount; | ||||
|             } | ||||
|  | ||||
|           /* Come here from code above that handles a named recursion */ | ||||
|  | ||||
|           HANDLE_RECURSION: | ||||
| @@ -4031,6 +4364,7 @@ for (;; ptr++) | ||||
|  | ||||
|         /* ------------------------------------------------------------ */ | ||||
|         default:              /* Other characters: check option setting */ | ||||
|         OTHER_CHAR_AFTER_QUERY: | ||||
|         set = unset = 0; | ||||
|         optset = &set; | ||||
|  | ||||
| @@ -4165,6 +4499,7 @@ for (;; ptr++) | ||||
|          errorcodeptr,                 /* Where to put an error message */ | ||||
|          (bravalue == OP_ASSERTBACK || | ||||
|           bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ | ||||
|          reset_bracount,               /* True if (?| group */ | ||||
|          skipbytes,                    /* Skip over bracket number */ | ||||
|          &subfirstbyte,                /* For possible first char */ | ||||
|          &subreqbyte,                  /* For possible last char */ | ||||
| @@ -4181,9 +4516,11 @@ for (;; ptr++) | ||||
|     is on the bracket. */ | ||||
|  | ||||
|     /* If this is a conditional bracket, check that there are no more than | ||||
|     two branches in the group, or just one if it's a DEFINE group. */ | ||||
|     two branches in the group, or just one if it's a DEFINE group. We do this | ||||
|     in the real compile phase, not in the pre-pass, where the whole group may | ||||
|     not be available. */ | ||||
|  | ||||
|     if (bravalue == OP_COND) | ||||
|     if (bravalue == OP_COND && lengthptr == NULL) | ||||
|       { | ||||
|       uschar *tc = code; | ||||
|       int condcount = 0; | ||||
| @@ -4343,12 +4680,13 @@ for (;; ptr++) | ||||
|       zerofirstbyte = firstbyte; | ||||
|       zeroreqbyte = reqbyte; | ||||
|  | ||||
|       /* \k<name> or \k'name' is a back reference by name (Perl syntax) */ | ||||
|       /* \k<name> or \k'name' is a back reference by name (Perl syntax). | ||||
|       We also support \k{name} (.NET syntax) */ | ||||
|  | ||||
|       if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'')) | ||||
|       if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{')) | ||||
|         { | ||||
|         is_recurse = FALSE; | ||||
|         terminator = (*(++ptr) == '<')? '>' : '\''; | ||||
|         terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}'; | ||||
|         goto NAMED_REF_OR_RECURSE; | ||||
|         } | ||||
|  | ||||
| @@ -4514,13 +4852,14 @@ This function is used during the pre-compile phase when we are trying to find | ||||
| out the amount of memory needed, as well as during the real compile phase. The | ||||
| value of lengthptr distinguishes the two phases. | ||||
|  | ||||
| Argument: | ||||
| Arguments: | ||||
|   options        option bits, including any changes for this subpattern | ||||
|   oldims         previous settings of ims option bits | ||||
|   codeptr        -> the address of the current code pointer | ||||
|   ptrptr         -> the address of the current pattern pointer | ||||
|   errorcodeptr   -> pointer to error code variable | ||||
|   lookbehind     TRUE if this is a lookbehind assertion | ||||
|   reset_bracount TRUE to reset the count for each branch | ||||
|   skipbytes      skip this many bytes at start (for brackets and OP_COND) | ||||
|   firstbyteptr   place to put the first required character, or a negative number | ||||
|   reqbyteptr     place to put the last required character, or a negative number | ||||
| @@ -4534,8 +4873,9 @@ Returns:         TRUE on success | ||||
|  | ||||
| static BOOL | ||||
| compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr, | ||||
|   int *errorcodeptr, BOOL lookbehind, int skipbytes, int *firstbyteptr, | ||||
|   int *reqbyteptr, branch_chain *bcptr, compile_data *cd, int *lengthptr) | ||||
|   int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes, | ||||
|   int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd, | ||||
|   int *lengthptr) | ||||
| { | ||||
| const uschar *ptr = *ptrptr; | ||||
| uschar *code = *codeptr; | ||||
| @@ -4545,6 +4885,8 @@ uschar *reverse_count = NULL; | ||||
| int firstbyte, reqbyte; | ||||
| int branchfirstbyte, branchreqbyte; | ||||
| int length; | ||||
| int orig_bracount; | ||||
| int max_bracount; | ||||
| branch_chain bc; | ||||
|  | ||||
| bc.outer = bcptr; | ||||
| @@ -4573,8 +4915,14 @@ code += 1 + LINK_SIZE + skipbytes; | ||||
|  | ||||
| /* Loop for each alternative branch */ | ||||
|  | ||||
| orig_bracount = max_bracount = cd->bracount; | ||||
| for (;;) | ||||
|   { | ||||
|   /* For a (?| group, reset the capturing bracket count so that each branch | ||||
|   uses the same numbers. */ | ||||
|  | ||||
|   if (reset_bracount) cd->bracount = orig_bracount; | ||||
|  | ||||
|   /* Handle a change of ims options at the start of the branch */ | ||||
|  | ||||
|   if ((options & PCRE_IMS) != oldims) | ||||
| @@ -4604,6 +4952,11 @@ for (;;) | ||||
|     return FALSE; | ||||
|     } | ||||
|  | ||||
|   /* Keep the highest bracket count in case (?| was used and some branch | ||||
|   has fewer than the rest. */ | ||||
|  | ||||
|   if (cd->bracount > max_bracount) max_bracount = cd->bracount; | ||||
|  | ||||
|   /* In the real compile phase, there is some post-processing to be done. */ | ||||
|  | ||||
|   if (lengthptr == NULL) | ||||
| @@ -4667,26 +5020,29 @@ for (;;) | ||||
|       } | ||||
|     } | ||||
|  | ||||
|   /* Reached end of expression, either ')' or end of pattern. Go back through | ||||
|   the alternative branches and reverse the chain of offsets, with the field in | ||||
|   the BRA item now becoming an offset to the first alternative. If there are | ||||
|   no alternatives, it points to the end of the group. The length in the | ||||
|   terminating ket is always the length of the whole bracketed item. If any of | ||||
|   the ims options were changed inside the group, compile a resetting op-code | ||||
|   following, except at the very end of the pattern. Return leaving the pointer | ||||
|   at the terminating char. */ | ||||
|   /* Reached end of expression, either ')' or end of pattern. In the real | ||||
|   compile phase, go back through the alternative branches and reverse the chain | ||||
|   of offsets, with the field in the BRA item now becoming an offset to the | ||||
|   first alternative. If there are no alternatives, it points to the end of the | ||||
|   group. The length in the terminating ket is always the length of the whole | ||||
|   bracketed item. If any of the ims options were changed inside the group, | ||||
|   compile a resetting op-code following, except at the very end of the pattern. | ||||
|   Return leaving the pointer at the terminating char. */ | ||||
|  | ||||
|   if (*ptr != '|') | ||||
|     { | ||||
|     int branch_length = code - last_branch; | ||||
|     do | ||||
|     if (lengthptr == NULL) | ||||
|       { | ||||
|       int prev_length = GET(last_branch, 1); | ||||
|       PUT(last_branch, 1, branch_length); | ||||
|       branch_length = prev_length; | ||||
|       last_branch -= branch_length; | ||||
|       int branch_length = code - last_branch; | ||||
|       do | ||||
|         { | ||||
|         int prev_length = GET(last_branch, 1); | ||||
|         PUT(last_branch, 1, branch_length); | ||||
|         branch_length = prev_length; | ||||
|         last_branch -= branch_length; | ||||
|         } | ||||
|       while (branch_length > 0); | ||||
|       } | ||||
|     while (branch_length > 0); | ||||
|  | ||||
|     /* Fill in the ket */ | ||||
|  | ||||
| @@ -4703,6 +5059,10 @@ for (;;) | ||||
|       length += 2; | ||||
|       } | ||||
|  | ||||
|     /* Retain the highest bracket number, in case resetting was used. */ | ||||
|  | ||||
|     cd->bracount = max_bracount; | ||||
|  | ||||
|     /* Set values to pass back */ | ||||
|  | ||||
|     *codeptr = code; | ||||
| @@ -4713,17 +5073,29 @@ for (;;) | ||||
|     return TRUE; | ||||
|     } | ||||
|  | ||||
|   /* Another branch follows; insert an "or" node. Its length field points back | ||||
|   /* Another branch follows. In the pre-compile phase, we can move the code | ||||
|   pointer back to where it was for the start of the first branch. (That is, | ||||
|   pretend that each branch is the only one.) | ||||
|  | ||||
|   In the real compile phase, insert an ALT node. Its length field points back | ||||
|   to the previous branch while the bracket remains open. At the end the chain | ||||
|   is reversed. It's done like this so that the start of the bracket has a | ||||
|   zero offset until it is closed, making it possible to detect recursion. */ | ||||
|  | ||||
|   *code = OP_ALT; | ||||
|   PUT(code, 1, code - last_branch); | ||||
|   bc.current = last_branch = code; | ||||
|   code += 1 + LINK_SIZE; | ||||
|   if (lengthptr != NULL) | ||||
|     { | ||||
|     code = *codeptr + 1 + LINK_SIZE + skipbytes; | ||||
|     length += 1 + LINK_SIZE; | ||||
|     } | ||||
|   else | ||||
|     { | ||||
|     *code = OP_ALT; | ||||
|     PUT(code, 1, code - last_branch); | ||||
|     bc.current = last_branch = code; | ||||
|     code += 1 + LINK_SIZE; | ||||
|     } | ||||
|  | ||||
|   ptr++; | ||||
|   length += 1 + LINK_SIZE; | ||||
|   } | ||||
| /* Control never reaches here */ | ||||
| } | ||||
| @@ -4990,7 +5362,7 @@ Returns:        pointer to compiled data block, or NULL on error, | ||||
|                 with errorptr and erroroffset set | ||||
| */ | ||||
|  | ||||
| PCRE_DATA_SCOPE pcre * | ||||
| PCRE_EXP_DEFN pcre * | ||||
| pcre_compile(const char *pattern, int options, const char **errorptr, | ||||
|   int *erroroffset, const unsigned char *tables) | ||||
| { | ||||
| @@ -4998,7 +5370,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables); | ||||
| } | ||||
|  | ||||
|  | ||||
| PCRE_DATA_SCOPE pcre * | ||||
| PCRE_EXP_DEFN pcre * | ||||
| pcre_compile2(const char *pattern, int options, int *errorcodeptr, | ||||
|   const char **errorptr, int *erroroffset, const unsigned char *tables) | ||||
| { | ||||
| @@ -5047,7 +5419,7 @@ if (errorcodeptr != NULL) *errorcodeptr = ERR0; | ||||
| if (erroroffset == NULL) | ||||
|   { | ||||
|   errorcode = ERR16; | ||||
|   goto PCRE_EARLY_ERROR_RETURN; | ||||
|   goto PCRE_EARLY_ERROR_RETURN2; | ||||
|   } | ||||
|  | ||||
| *erroroffset = 0; | ||||
| @@ -5060,7 +5432,7 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && | ||||
|      (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0) | ||||
|   { | ||||
|   errorcode = ERR44; | ||||
|   goto PCRE_UTF8_ERROR_RETURN; | ||||
|   goto PCRE_EARLY_ERROR_RETURN2; | ||||
|   } | ||||
| #else | ||||
| if ((options & PCRE_UTF8) != 0) | ||||
| @@ -5085,7 +5457,8 @@ cd->cbits = tables + cbits_offset; | ||||
| cd->ctypes = tables + ctypes_offset; | ||||
|  | ||||
| /* Handle different types of newline. The three bits give seven cases. The | ||||
| current code allows for fixed one- or two-byte sequences, plus "any". */ | ||||
| current code allows for fixed one- or two-byte sequences, plus "any" and | ||||
| "anycrlf". */ | ||||
|  | ||||
| switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY)) | ||||
|   { | ||||
| @@ -5095,10 +5468,15 @@ switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY)) | ||||
|   case PCRE_NEWLINE_CR+ | ||||
|        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break; | ||||
|   case PCRE_NEWLINE_ANY: newline = -1; break; | ||||
|   case PCRE_NEWLINE_ANYCRLF: newline = -2; break; | ||||
|   default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN; | ||||
|   } | ||||
|  | ||||
| if (newline < 0) | ||||
| if (newline == -2) | ||||
|   { | ||||
|   cd->nltype = NLTYPE_ANYCRLF; | ||||
|   } | ||||
| else if (newline < 0) | ||||
|   { | ||||
|   cd->nltype = NLTYPE_ANY; | ||||
|   } | ||||
| @@ -5159,7 +5537,8 @@ outside can help speed up starting point checks. */ | ||||
| code = cworkspace; | ||||
| *code = OP_BRA; | ||||
| (void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS, | ||||
|   &code, &ptr, &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, &length); | ||||
|   &code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, | ||||
|   &length); | ||||
| if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; | ||||
|  | ||||
| DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, | ||||
| @@ -5227,7 +5606,7 @@ ptr = (const uschar *)pattern; | ||||
| code = (uschar *)codestart; | ||||
| *code = OP_BRA; | ||||
| (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr, | ||||
|   &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL); | ||||
|   &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL); | ||||
| re->top_bracket = cd->bracount; | ||||
| re->top_backref = cd->top_backref; | ||||
|  | ||||
| @@ -5272,9 +5651,7 @@ if (errorcode != 0) | ||||
|   (pcre_free)(re); | ||||
|   PCRE_EARLY_ERROR_RETURN: | ||||
|   *erroroffset = ptr - (const uschar *)pattern; | ||||
| #ifdef SUPPORT_UTF8 | ||||
|   PCRE_UTF8_ERROR_RETURN: | ||||
| #endif | ||||
|   PCRE_EARLY_ERROR_RETURN2: | ||||
|   *errorptr = error_texts + error_texts_offsets[errorcode]; | ||||
|   if (errorcodeptr != NULL) *errorcodeptr = errorcode; | ||||
|   return NULL; | ||||
| @@ -5364,7 +5741,7 @@ if ((re->options & PCRE_REQCHSET) != 0) | ||||
|     else printf("Req char = \\x%02x%s\n", ch, caseless); | ||||
|   } | ||||
|  | ||||
| pcre_printint(re, stdout); | ||||
| pcre_printint(re, stdout, TRUE); | ||||
|  | ||||
| /* This check is done here in the debugging case so that the code that | ||||
| was compiled can be seen. */ | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -58,7 +58,7 @@ Arguments: | ||||
| Returns:           0 if data returned, negative on error | ||||
| */ | ||||
|  | ||||
| PCRE_DATA_SCOPE int | ||||
| PCRE_EXP_DEFN int | ||||
| pcre_config(int what, void *where) | ||||
| { | ||||
| switch (what) | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -63,24 +63,30 @@ applications. */ | ||||
|  | ||||
| /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes | ||||
| into others, under special conditions. A gap of 20 between the blocks should be | ||||
| enough. */ | ||||
| enough. The resulting opcodes don't have to be less than 256 because they are | ||||
| never stored, so we push them well clear of the normal opcodes. */ | ||||
|  | ||||
| #define OP_PROP_EXTRA 100 | ||||
| #define OP_EXTUNI_EXTRA 120 | ||||
| #define OP_ANYNL_EXTRA 140 | ||||
| #define OP_PROP_EXTRA       300 | ||||
| #define OP_EXTUNI_EXTRA     320 | ||||
| #define OP_ANYNL_EXTRA      340 | ||||
| #define OP_HSPACE_EXTRA     360 | ||||
| #define OP_VSPACE_EXTRA     380 | ||||
|  | ||||
|  | ||||
| /* This table identifies those opcodes that are followed immediately by a | ||||
| character that is to be tested in some way. This makes is possible to | ||||
| centralize the loading of these characters. In the case of Type * etc, the | ||||
| "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a | ||||
| small value. */ | ||||
| small value. ***NOTE*** If the start of this table is modified, the two tables | ||||
| that follow must also be modified. */ | ||||
|  | ||||
| static uschar coptable[] = { | ||||
|   0,                             /* End                                    */ | ||||
|   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */ | ||||
|   0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */ | ||||
|   0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */ | ||||
|   0, 0,                          /* Any, Anybyte                           */ | ||||
|   0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */ | ||||
|   0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */ | ||||
|   0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */ | ||||
|   0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */ | ||||
|   1,                             /* Char                                   */ | ||||
|   1,                             /* Charnc                                 */ | ||||
| @@ -127,7 +133,7 @@ static uschar coptable[] = { | ||||
| and \w */ | ||||
|  | ||||
| static uschar toptable1[] = { | ||||
|   0, 0, 0, 0, 0, | ||||
|   0, 0, 0, 0, 0, 0, | ||||
|   ctype_digit, ctype_digit, | ||||
|   ctype_space, ctype_space, | ||||
|   ctype_word,  ctype_word, | ||||
| @@ -135,7 +141,7 @@ static uschar toptable1[] = { | ||||
| }; | ||||
|  | ||||
| static uschar toptable2[] = { | ||||
|   0, 0, 0, 0, 0, | ||||
|   0, 0, 0, 0, 0, 0, | ||||
|   ctype_digit, 0, | ||||
|   ctype_space, 0, | ||||
|   ctype_word,  0, | ||||
| @@ -500,7 +506,9 @@ for (;;) | ||||
|     const uschar *code; | ||||
|     int state_offset = current_state->offset; | ||||
|     int count, codevalue; | ||||
| #ifdef SUPPORT_UCP | ||||
|     int chartype, script; | ||||
| #endif | ||||
|  | ||||
| #ifdef DEBUG | ||||
|     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); | ||||
| @@ -555,10 +563,10 @@ for (;;) | ||||
|     permitted. | ||||
|  | ||||
|     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an | ||||
|     argument that is not a data character - but is always one byte long. | ||||
|     Unfortunately, we have to take special action to deal with  \P, \p, and | ||||
|     \X in this case. To keep the other cases fast, convert these ones to new | ||||
|     opcodes. */ | ||||
|     argument that is not a data character - but is always one byte long. We | ||||
|     have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in | ||||
|     this case. To keep the other cases fast, convert these ones to new opcodes. | ||||
|     */ | ||||
|  | ||||
|     if (coptable[codevalue] > 0) | ||||
|       { | ||||
| @@ -576,6 +584,10 @@ for (;;) | ||||
|           case OP_PROP: codevalue += OP_PROP_EXTRA; break; | ||||
|           case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break; | ||||
|           case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break; | ||||
|           case OP_NOT_HSPACE: | ||||
|           case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break; | ||||
|           case OP_NOT_VSPACE: | ||||
|           case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break; | ||||
|           default: break; | ||||
|           } | ||||
|         } | ||||
| @@ -783,13 +795,12 @@ for (;;) | ||||
|       break; | ||||
|  | ||||
|  | ||||
| #ifdef SUPPORT_UCP | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       /* Check the next character by Unicode property. We will get here only | ||||
|       if the support is in the binary; otherwise a compile-time error occurs. | ||||
|       */ | ||||
|  | ||||
| #ifdef SUPPORT_UCP | ||||
|       case OP_PROP: | ||||
|       case OP_NOTPROP: | ||||
|       if (clen > 0) | ||||
| @@ -970,6 +981,7 @@ for (;;) | ||||
|       argument. It keeps the code above fast for the other cases. The argument | ||||
|       is in the d variable. */ | ||||
|  | ||||
| #ifdef SUPPORT_UCP | ||||
|       case OP_PROP_EXTRA + OP_TYPEPLUS: | ||||
|       case OP_PROP_EXTRA + OP_TYPEMINPLUS: | ||||
|       case OP_PROP_EXTRA + OP_TYPEPOSPLUS: | ||||
| @@ -1049,6 +1061,7 @@ for (;;) | ||||
|         ADD_NEW_DATA(-state_offset, count, ncount); | ||||
|         } | ||||
|       break; | ||||
| #endif | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_ANYNL_EXTRA + OP_TYPEPLUS: | ||||
| @@ -1085,6 +1098,97 @@ for (;;) | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEPLUS: | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEMINPLUS: | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS: | ||||
|       count = current_state->count;  /* Already matched */ | ||||
|       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } | ||||
|       if (clen > 0) | ||||
|         { | ||||
|         BOOL OK; | ||||
|         switch (c) | ||||
|           { | ||||
|           case 0x000a: | ||||
|           case 0x000b: | ||||
|           case 0x000c: | ||||
|           case 0x000d: | ||||
|           case 0x0085: | ||||
|           case 0x2028: | ||||
|           case 0x2029: | ||||
|           OK = TRUE; | ||||
|           break; | ||||
|  | ||||
|           default: | ||||
|           OK = FALSE; | ||||
|           break; | ||||
|           } | ||||
|  | ||||
|         if (OK == (d == OP_VSPACE)) | ||||
|           { | ||||
|           if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS) | ||||
|             { | ||||
|             active_count--;           /* Remove non-match possibility */ | ||||
|             next_active_state--; | ||||
|             } | ||||
|           count++; | ||||
|           ADD_NEW_DATA(-state_offset, count, 0); | ||||
|           } | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEPLUS: | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEMINPLUS: | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS: | ||||
|       count = current_state->count;  /* Already matched */ | ||||
|       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } | ||||
|       if (clen > 0) | ||||
|         { | ||||
|         BOOL OK; | ||||
|         switch (c) | ||||
|           { | ||||
|           case 0x09:      /* HT */ | ||||
|           case 0x20:      /* SPACE */ | ||||
|           case 0xa0:      /* NBSP */ | ||||
|           case 0x1680:    /* OGHAM SPACE MARK */ | ||||
|           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */ | ||||
|           case 0x2000:    /* EN QUAD */ | ||||
|           case 0x2001:    /* EM QUAD */ | ||||
|           case 0x2002:    /* EN SPACE */ | ||||
|           case 0x2003:    /* EM SPACE */ | ||||
|           case 0x2004:    /* THREE-PER-EM SPACE */ | ||||
|           case 0x2005:    /* FOUR-PER-EM SPACE */ | ||||
|           case 0x2006:    /* SIX-PER-EM SPACE */ | ||||
|           case 0x2007:    /* FIGURE SPACE */ | ||||
|           case 0x2008:    /* PUNCTUATION SPACE */ | ||||
|           case 0x2009:    /* THIN SPACE */ | ||||
|           case 0x200A:    /* HAIR SPACE */ | ||||
|           case 0x202f:    /* NARROW NO-BREAK SPACE */ | ||||
|           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */ | ||||
|           case 0x3000:    /* IDEOGRAPHIC SPACE */ | ||||
|           OK = TRUE; | ||||
|           break; | ||||
|  | ||||
|           default: | ||||
|           OK = FALSE; | ||||
|           break; | ||||
|           } | ||||
|  | ||||
|         if (OK == (d == OP_HSPACE)) | ||||
|           { | ||||
|           if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS) | ||||
|             { | ||||
|             active_count--;           /* Remove non-match possibility */ | ||||
|             next_active_state--; | ||||
|             } | ||||
|           count++; | ||||
|           ADD_NEW_DATA(-state_offset, count, 0); | ||||
|           } | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
| #ifdef SUPPORT_UCP | ||||
|       case OP_PROP_EXTRA + OP_TYPEQUERY: | ||||
|       case OP_PROP_EXTRA + OP_TYPEMINQUERY: | ||||
|       case OP_PROP_EXTRA + OP_TYPEPOSQUERY: | ||||
| @@ -1182,6 +1286,7 @@ for (;;) | ||||
|         ADD_NEW_DATA(-(state_offset + count), 0, ncount); | ||||
|         } | ||||
|       break; | ||||
| #endif | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_ANYNL_EXTRA + OP_TYPEQUERY: | ||||
| @@ -1226,6 +1331,112 @@ for (;;) | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEQUERY: | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEMINQUERY: | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY: | ||||
|       count = 2; | ||||
|       goto QS4; | ||||
|  | ||||
|       case OP_VSPACE_EXTRA + OP_TYPESTAR: | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEMINSTAR: | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR: | ||||
|       count = 0; | ||||
|  | ||||
|       QS4: | ||||
|       ADD_ACTIVE(state_offset + 2, 0); | ||||
|       if (clen > 0) | ||||
|         { | ||||
|         BOOL OK; | ||||
|         switch (c) | ||||
|           { | ||||
|           case 0x000a: | ||||
|           case 0x000b: | ||||
|           case 0x000c: | ||||
|           case 0x000d: | ||||
|           case 0x0085: | ||||
|           case 0x2028: | ||||
|           case 0x2029: | ||||
|           OK = TRUE; | ||||
|           break; | ||||
|  | ||||
|           default: | ||||
|           OK = FALSE; | ||||
|           break; | ||||
|           } | ||||
|         if (OK == (d == OP_VSPACE)) | ||||
|           { | ||||
|           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR || | ||||
|               codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY) | ||||
|             { | ||||
|             active_count--;           /* Remove non-match possibility */ | ||||
|             next_active_state--; | ||||
|             } | ||||
|           ADD_NEW_DATA(-(state_offset + count), 0, 0); | ||||
|           } | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEQUERY: | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEMINQUERY: | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY: | ||||
|       count = 2; | ||||
|       goto QS5; | ||||
|  | ||||
|       case OP_HSPACE_EXTRA + OP_TYPESTAR: | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEMINSTAR: | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR: | ||||
|       count = 0; | ||||
|  | ||||
|       QS5: | ||||
|       ADD_ACTIVE(state_offset + 2, 0); | ||||
|       if (clen > 0) | ||||
|         { | ||||
|         BOOL OK; | ||||
|         switch (c) | ||||
|           { | ||||
|           case 0x09:      /* HT */ | ||||
|           case 0x20:      /* SPACE */ | ||||
|           case 0xa0:      /* NBSP */ | ||||
|           case 0x1680:    /* OGHAM SPACE MARK */ | ||||
|           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */ | ||||
|           case 0x2000:    /* EN QUAD */ | ||||
|           case 0x2001:    /* EM QUAD */ | ||||
|           case 0x2002:    /* EN SPACE */ | ||||
|           case 0x2003:    /* EM SPACE */ | ||||
|           case 0x2004:    /* THREE-PER-EM SPACE */ | ||||
|           case 0x2005:    /* FOUR-PER-EM SPACE */ | ||||
|           case 0x2006:    /* SIX-PER-EM SPACE */ | ||||
|           case 0x2007:    /* FIGURE SPACE */ | ||||
|           case 0x2008:    /* PUNCTUATION SPACE */ | ||||
|           case 0x2009:    /* THIN SPACE */ | ||||
|           case 0x200A:    /* HAIR SPACE */ | ||||
|           case 0x202f:    /* NARROW NO-BREAK SPACE */ | ||||
|           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */ | ||||
|           case 0x3000:    /* IDEOGRAPHIC SPACE */ | ||||
|           OK = TRUE; | ||||
|           break; | ||||
|  | ||||
|           default: | ||||
|           OK = FALSE; | ||||
|           break; | ||||
|           } | ||||
|  | ||||
|         if (OK == (d == OP_HSPACE)) | ||||
|           { | ||||
|           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR || | ||||
|               codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY) | ||||
|             { | ||||
|             active_count--;           /* Remove non-match possibility */ | ||||
|             next_active_state--; | ||||
|             } | ||||
|           ADD_NEW_DATA(-(state_offset + count), 0, 0); | ||||
|           } | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
| #ifdef SUPPORT_UCP | ||||
|       case OP_PROP_EXTRA + OP_TYPEEXACT: | ||||
|       case OP_PROP_EXTRA + OP_TYPEUPTO: | ||||
|       case OP_PROP_EXTRA + OP_TYPEMINUPTO: | ||||
| @@ -1313,6 +1524,7 @@ for (;;) | ||||
|           { ADD_NEW_DATA(-state_offset, count, ncount); } | ||||
|         } | ||||
|       break; | ||||
| #endif | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_ANYNL_EXTRA + OP_TYPEEXACT: | ||||
| @@ -1352,6 +1564,103 @@ for (;;) | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEEXACT: | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEUPTO: | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEMINUPTO: | ||||
|       case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO: | ||||
|       if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT) | ||||
|         { ADD_ACTIVE(state_offset + 4, 0); } | ||||
|       count = current_state->count;  /* Number already matched */ | ||||
|       if (clen > 0) | ||||
|         { | ||||
|         BOOL OK; | ||||
|         switch (c) | ||||
|           { | ||||
|           case 0x000a: | ||||
|           case 0x000b: | ||||
|           case 0x000c: | ||||
|           case 0x000d: | ||||
|           case 0x0085: | ||||
|           case 0x2028: | ||||
|           case 0x2029: | ||||
|           OK = TRUE; | ||||
|           break; | ||||
|  | ||||
|           default: | ||||
|           OK = FALSE; | ||||
|           } | ||||
|  | ||||
|         if (OK == (d == OP_VSPACE)) | ||||
|           { | ||||
|           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO) | ||||
|             { | ||||
|             active_count--;           /* Remove non-match possibility */ | ||||
|             next_active_state--; | ||||
|             } | ||||
|           if (++count >= GET2(code, 1)) | ||||
|             { ADD_NEW_DATA(-(state_offset + 4), 0, 0); } | ||||
|           else | ||||
|             { ADD_NEW_DATA(-state_offset, count, 0); } | ||||
|           } | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEEXACT: | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEUPTO: | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEMINUPTO: | ||||
|       case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO: | ||||
|       if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT) | ||||
|         { ADD_ACTIVE(state_offset + 4, 0); } | ||||
|       count = current_state->count;  /* Number already matched */ | ||||
|       if (clen > 0) | ||||
|         { | ||||
|         BOOL OK; | ||||
|         switch (c) | ||||
|           { | ||||
|           case 0x09:      /* HT */ | ||||
|           case 0x20:      /* SPACE */ | ||||
|           case 0xa0:      /* NBSP */ | ||||
|           case 0x1680:    /* OGHAM SPACE MARK */ | ||||
|           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */ | ||||
|           case 0x2000:    /* EN QUAD */ | ||||
|           case 0x2001:    /* EM QUAD */ | ||||
|           case 0x2002:    /* EN SPACE */ | ||||
|           case 0x2003:    /* EM SPACE */ | ||||
|           case 0x2004:    /* THREE-PER-EM SPACE */ | ||||
|           case 0x2005:    /* FOUR-PER-EM SPACE */ | ||||
|           case 0x2006:    /* SIX-PER-EM SPACE */ | ||||
|           case 0x2007:    /* FIGURE SPACE */ | ||||
|           case 0x2008:    /* PUNCTUATION SPACE */ | ||||
|           case 0x2009:    /* THIN SPACE */ | ||||
|           case 0x200A:    /* HAIR SPACE */ | ||||
|           case 0x202f:    /* NARROW NO-BREAK SPACE */ | ||||
|           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */ | ||||
|           case 0x3000:    /* IDEOGRAPHIC SPACE */ | ||||
|           OK = TRUE; | ||||
|           break; | ||||
|  | ||||
|           default: | ||||
|           OK = FALSE; | ||||
|           break; | ||||
|           } | ||||
|  | ||||
|         if (OK == (d == OP_HSPACE)) | ||||
|           { | ||||
|           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO) | ||||
|             { | ||||
|             active_count--;           /* Remove non-match possibility */ | ||||
|             next_active_state--; | ||||
|             } | ||||
|           if (++count >= GET2(code, 1)) | ||||
|             { ADD_NEW_DATA(-(state_offset + 4), 0, 0); } | ||||
|           else | ||||
|             { ADD_NEW_DATA(-state_offset, count, 0); } | ||||
|           } | ||||
|         } | ||||
|       break; | ||||
|  | ||||
| /* ========================================================================== */ | ||||
|       /* These opcodes are followed by a character that is usually compared | ||||
|       to the current subject character; it is loaded into d. We still get | ||||
| @@ -1450,6 +1759,102 @@ for (;;) | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_NOT_VSPACE: | ||||
|       if (clen > 0) switch(c) | ||||
|         { | ||||
|         case 0x000a: | ||||
|         case 0x000b: | ||||
|         case 0x000c: | ||||
|         case 0x000d: | ||||
|         case 0x0085: | ||||
|         case 0x2028: | ||||
|         case 0x2029: | ||||
|         break; | ||||
|  | ||||
|         default: | ||||
|         ADD_NEW(state_offset + 1, 0); | ||||
|         break; | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_VSPACE: | ||||
|       if (clen > 0) switch(c) | ||||
|         { | ||||
|         case 0x000a: | ||||
|         case 0x000b: | ||||
|         case 0x000c: | ||||
|         case 0x000d: | ||||
|         case 0x0085: | ||||
|         case 0x2028: | ||||
|         case 0x2029: | ||||
|         ADD_NEW(state_offset + 1, 0); | ||||
|         break; | ||||
|  | ||||
|         default: break; | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_NOT_HSPACE: | ||||
|       if (clen > 0) switch(c) | ||||
|         { | ||||
|         case 0x09:      /* HT */ | ||||
|         case 0x20:      /* SPACE */ | ||||
|         case 0xa0:      /* NBSP */ | ||||
|         case 0x1680:    /* OGHAM SPACE MARK */ | ||||
|         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */ | ||||
|         case 0x2000:    /* EN QUAD */ | ||||
|         case 0x2001:    /* EM QUAD */ | ||||
|         case 0x2002:    /* EN SPACE */ | ||||
|         case 0x2003:    /* EM SPACE */ | ||||
|         case 0x2004:    /* THREE-PER-EM SPACE */ | ||||
|         case 0x2005:    /* FOUR-PER-EM SPACE */ | ||||
|         case 0x2006:    /* SIX-PER-EM SPACE */ | ||||
|         case 0x2007:    /* FIGURE SPACE */ | ||||
|         case 0x2008:    /* PUNCTUATION SPACE */ | ||||
|         case 0x2009:    /* THIN SPACE */ | ||||
|         case 0x200A:    /* HAIR SPACE */ | ||||
|         case 0x202f:    /* NARROW NO-BREAK SPACE */ | ||||
|         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */ | ||||
|         case 0x3000:    /* IDEOGRAPHIC SPACE */ | ||||
|         break; | ||||
|  | ||||
|         default: | ||||
|         ADD_NEW(state_offset + 1, 0); | ||||
|         break; | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       case OP_HSPACE: | ||||
|       if (clen > 0) switch(c) | ||||
|         { | ||||
|         case 0x09:      /* HT */ | ||||
|         case 0x20:      /* SPACE */ | ||||
|         case 0xa0:      /* NBSP */ | ||||
|         case 0x1680:    /* OGHAM SPACE MARK */ | ||||
|         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */ | ||||
|         case 0x2000:    /* EN QUAD */ | ||||
|         case 0x2001:    /* EM QUAD */ | ||||
|         case 0x2002:    /* EN SPACE */ | ||||
|         case 0x2003:    /* EM SPACE */ | ||||
|         case 0x2004:    /* THREE-PER-EM SPACE */ | ||||
|         case 0x2005:    /* FOUR-PER-EM SPACE */ | ||||
|         case 0x2006:    /* SIX-PER-EM SPACE */ | ||||
|         case 0x2007:    /* FIGURE SPACE */ | ||||
|         case 0x2008:    /* PUNCTUATION SPACE */ | ||||
|         case 0x2009:    /* THIN SPACE */ | ||||
|         case 0x200A:    /* HAIR SPACE */ | ||||
|         case 0x202f:    /* NARROW NO-BREAK SPACE */ | ||||
|         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */ | ||||
|         case 0x3000:    /* IDEOGRAPHIC SPACE */ | ||||
|         ADD_NEW(state_offset + 1, 0); | ||||
|         break; | ||||
|         } | ||||
|       break; | ||||
|  | ||||
|       /*-----------------------------------------------------------------*/ | ||||
|       /* Match a negated single character. This is only used for one-byte | ||||
|       characters, that is, we know that d < 256. The character we are | ||||
| @@ -2057,7 +2462,7 @@ is not anchored. | ||||
|  | ||||
| Arguments: | ||||
|   argument_re     points to the compiled expression | ||||
|   extra_data      points to extra data or is NULL (not currently used) | ||||
|   extra_data      points to extra data or is NULL | ||||
|   subject         points to the subject string | ||||
|   length          length of subject string (may contain binary zeros) | ||||
|   start_offset    where to start in the subject string | ||||
| @@ -2073,7 +2478,7 @@ Returns:          > 0 => number of match offset pairs placed in offsets | ||||
|                  < -1 => some kind of unexpected problem | ||||
| */ | ||||
|  | ||||
| PCRE_DATA_SCOPE int | ||||
| PCRE_EXP_DEFN int | ||||
| pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, | ||||
|   const char *subject, int length, int start_offset, int options, int *offsets, | ||||
|   int offsetcount, int *workspace, int wscount) | ||||
| @@ -2163,10 +2568,10 @@ md->end_subject = end_subject; | ||||
| md->moptions = options; | ||||
| md->poptions = re->options; | ||||
|  | ||||
| /* Handle different types of newline. The two bits give four cases. If nothing | ||||
| is set at run time, whatever was used at compile time applies. */ | ||||
| /* Handle different types of newline. The three bits give eight cases. If | ||||
| nothing is set at run time, whatever was used at compile time applies. */ | ||||
|  | ||||
| switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) & | ||||
| switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) & | ||||
|          PCRE_NEWLINE_BITS) | ||||
|   { | ||||
|   case 0: newline = NEWLINE; break;   /* Compile-time default */ | ||||
| @@ -2175,10 +2580,15 @@ switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) & | ||||
|   case PCRE_NEWLINE_CR+ | ||||
|        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break; | ||||
|   case PCRE_NEWLINE_ANY: newline = -1; break; | ||||
|   case PCRE_NEWLINE_ANYCRLF: newline = -2; break; | ||||
|   default: return PCRE_ERROR_BADNEWLINE; | ||||
|   } | ||||
|  | ||||
| if (newline < 0) | ||||
| if (newline == -2) | ||||
|   { | ||||
|   md->nltype = NLTYPE_ANYCRLF; | ||||
|   } | ||||
| else if (newline < 0) | ||||
|   { | ||||
|   md->nltype = NLTYPE_ANY; | ||||
|   } | ||||
| @@ -2308,6 +2718,16 @@ for (;;) | ||||
|         { | ||||
|         while (current_subject <= end_subject && !WAS_NEWLINE(current_subject)) | ||||
|           current_subject++; | ||||
|  | ||||
|         /* If we have just passed a CR and the newline option is ANY or | ||||
|         ANYCRLF, and we are now at a LF, advance the match position by one more | ||||
|         character. */ | ||||
|  | ||||
|         if (current_subject[-1] == '\r' && | ||||
|              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && | ||||
|              current_subject < end_subject && | ||||
|              *current_subject == '\n') | ||||
|           current_subject++; | ||||
|         } | ||||
|       } | ||||
|  | ||||
| @@ -2416,11 +2836,14 @@ for (;;) | ||||
|     } | ||||
|   if (current_subject > end_subject) break; | ||||
|  | ||||
|   /* If we have just passed a CR and the newline option is CRLF or ANY, and we | ||||
|   are now at a LF, advance the match position by one more character. */ | ||||
|   /* If we have just passed a CR and the newline option is CRLF or ANY or | ||||
|   ANYCRLF, and we are now at a LF, advance the match position by one more | ||||
|   character. */ | ||||
|  | ||||
|   if (current_subject[-1] == '\r' && | ||||
|        (md->nltype == NLTYPE_ANY || md->nllen == 2) && | ||||
|        (md->nltype == NLTYPE_ANY || | ||||
|         md->nltype == NLTYPE_ANYCRLF || | ||||
|         md->nllen == 2) && | ||||
|        current_subject < end_subject && | ||||
|        *current_subject == '\n') | ||||
|     current_subject++; | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -61,7 +61,7 @@ Arguments: | ||||
| Returns:           0 if data returned, negative on error | ||||
| */ | ||||
|  | ||||
| PCRE_DATA_SCOPE int | ||||
| PCRE_EXP_DEFN int | ||||
| pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what, | ||||
|   void *where) | ||||
| { | ||||
| @@ -140,6 +140,14 @@ switch (what) | ||||
|   *((const uschar **)where) = (const uschar *)(_pcre_default_tables); | ||||
|   break; | ||||
|  | ||||
|   case PCRE_INFO_OKPARTIAL: | ||||
|   *((int *)where) = (re->options & PCRE_NOPARTIAL) == 0; | ||||
|   break; | ||||
|  | ||||
|   case PCRE_INFO_JCHANGED: | ||||
|   *((int *)where) = (re->options & PCRE_JCHANGED) != 0; | ||||
|   break; | ||||
|  | ||||
|   default: return PCRE_ERROR_BADOPTION; | ||||
|   } | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -46,14 +46,8 @@ indirection. These values can be changed by the caller, but are shared between | ||||
| all threads. However, when compiling for Virtual Pascal, things are done | ||||
| differently, and global variables are not used (see pcre.in). */ | ||||
|  | ||||
|  | ||||
| #include "pcre_internal.h" | ||||
|  | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| extern "C" int   (*pcre_callout)(pcre_callout_block *) = NULL; | ||||
| #else | ||||
| int   (*pcre_callout)(pcre_callout_block *) = NULL; | ||||
| #endif | ||||
| PCRE_EXP_DATA_DEFN int   (*pcre_callout)(pcre_callout_block *) = NULL; | ||||
|  | ||||
| /* End of pcre_globals.c */ | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -68,7 +68,7 @@ Returns:        number of capturing subpatterns | ||||
|                 or negative values on error | ||||
| */ | ||||
|  | ||||
| PCRE_DATA_SCOPE int | ||||
| PCRE_EXP_DEFN int | ||||
| pcre_info(const pcre *argument_re, int *optptr, int *first_byte) | ||||
| { | ||||
| real_pcre internal_re; | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -83,8 +83,58 @@ setjmp and stdarg are used is when NO_RECURSE is set. */ | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
|  | ||||
| #ifndef PCRE_SPY | ||||
| #define PCRE_DEFINITION       /* Win32 __declspec(export) trigger for .dll */ | ||||
| /* When compiling a DLL for Windows, the exported symbols have to be declared | ||||
| using some MS magic. I found some useful information on this web page: | ||||
| http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the | ||||
| information there, using __declspec(dllexport) without "extern" we have a | ||||
| definition; with "extern" we have a declaration. The settings here override the | ||||
| setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL, | ||||
| which is all that is needed for applications (they just import the symbols). We | ||||
| use: | ||||
|  | ||||
|   PCRE_EXP_DECL       for declarations | ||||
|   PCRE_EXP_DEFN       for definitions of exported functions | ||||
|   PCRE_EXP_DATA_DEFN  for definitions of exported variables | ||||
|  | ||||
| The reason for the two DEFN macros is that in non-Windows environments, one | ||||
| does not want to have "extern" before variable definitions because it leads to | ||||
| compiler warnings. So we distinguish between functions and variables. In | ||||
| Windows, the two should always be the same. | ||||
|  | ||||
| The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest, | ||||
| which is an application, but needs to import this file in order to "peek" at | ||||
| internals, can #include pcre.h first to get an application's-eye view. | ||||
|  | ||||
| In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, | ||||
| special-purpose environments) might want to stick other stuff in front of | ||||
| exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and | ||||
| PCRE_EXP_DATA_DEFN only if they are not already set. */ | ||||
|  | ||||
| #ifndef PCRE_EXP_DECL | ||||
| #  ifdef _WIN32 | ||||
| #    ifdef DLL_EXPORT | ||||
| #      define PCRE_EXP_DECL       extern __declspec(dllexport) | ||||
| #      define PCRE_EXP_DEFN       __declspec(dllexport) | ||||
| #      define PCRE_EXP_DATA_DEFN  __declspec(dllexport) | ||||
| #    else | ||||
| #      define PCRE_EXP_DECL       extern | ||||
| #      define PCRE_EXP_DEFN | ||||
| #      define PCRE_EXP_DATA_DEFN | ||||
| #    endif | ||||
| # | ||||
| #  else | ||||
| #    ifdef __cplusplus | ||||
| #      define PCRE_EXP_DECL       extern "C" | ||||
| #    else | ||||
| #      define PCRE_EXP_DECL       extern | ||||
| #    endif | ||||
| #    ifndef PCRE_EXP_DEFN | ||||
| #      define PCRE_EXP_DEFN       PCRE_EXP_DECL | ||||
| #    endif | ||||
| #    ifndef PCRE_EXP_DATA_DEFN | ||||
| #      define PCRE_EXP_DATA_DEFN | ||||
| #    endif | ||||
| #  endif | ||||
| #endif | ||||
|  | ||||
| /* We need to have types that specify unsigned 16-bit and 32-bit integers. We | ||||
| @@ -125,21 +175,22 @@ characters only go up to 0x7fffffff (though Unicode doesn't go beyond | ||||
| #define NOTACHAR 0xffffffff | ||||
|  | ||||
| /* PCRE is able to support several different kinds of newline (CR, LF, CRLF, | ||||
| and "all" at present). The following macros are used to package up testing for | ||||
| newlines. NLBLOCK, PSSTART, and PSEND are defined in the various modules to | ||||
| indicate in which datablock the parameters exist, and what the start/end of | ||||
| string field names are. */ | ||||
| "any" and "anycrlf" at present). The following macros are used to package up | ||||
| testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various | ||||
| modules to indicate in which datablock the parameters exist, and what the | ||||
| start/end of string field names are. */ | ||||
|  | ||||
| #define NLTYPE_FIXED   0     /* Newline is a fixed length string */ | ||||
| #define NLTYPE_ANY     1     /* Newline is any Unicode line ending */ | ||||
| #define NLTYPE_FIXED    0     /* Newline is a fixed length string */ | ||||
| #define NLTYPE_ANY      1     /* Newline is any Unicode line ending */ | ||||
| #define NLTYPE_ANYCRLF  2     /* Newline is CR, LF, or CRLF */ | ||||
|  | ||||
| /* This macro checks for a newline at the given position */ | ||||
|  | ||||
| #define IS_NEWLINE(p) \ | ||||
|   ((NLBLOCK->nltype != NLTYPE_FIXED)? \ | ||||
|     ((p) < NLBLOCK->PSEND && \ | ||||
|      _pcre_is_newline((p), NLBLOCK->PSEND, &(NLBLOCK->nllen), utf8) \ | ||||
|     ) \ | ||||
|      _pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\ | ||||
|        utf8)) \ | ||||
|     : \ | ||||
|     ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ | ||||
|      (p)[0] == NLBLOCK->nl[0] && \ | ||||
| @@ -152,8 +203,8 @@ string field names are. */ | ||||
| #define WAS_NEWLINE(p) \ | ||||
|   ((NLBLOCK->nltype != NLTYPE_FIXED)? \ | ||||
|     ((p) > NLBLOCK->PSSTART && \ | ||||
|      _pcre_was_newline((p), NLBLOCK->PSSTART, &(NLBLOCK->nllen), utf8) \ | ||||
|     ) \ | ||||
|      _pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ | ||||
|        &(NLBLOCK->nllen), utf8)) \ | ||||
|     : \ | ||||
|     ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ | ||||
|      (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \ | ||||
| @@ -178,10 +229,12 @@ must begin with PCRE_. */ | ||||
| #define USPTR const unsigned char * | ||||
| #endif | ||||
|  | ||||
|  | ||||
|  | ||||
| /* Include the public PCRE header and the definitions of UCP character property | ||||
| values. */ | ||||
|  | ||||
| #include "pcre.h" | ||||
| #include <pcre.h> | ||||
| #include "ucp.h" | ||||
|  | ||||
| /* When compiling for use with the Virtual Pascal compiler, these functions | ||||
| @@ -189,7 +242,9 @@ need to have their names changed. PCRE must be compiled with the -DVPCOMPAT | ||||
| option on the command line. */ | ||||
|  | ||||
| #ifdef VPCOMPAT | ||||
| #define strlen(s)        _strlen(s) | ||||
| #define strncmp(s1,s2,m) _strncmp(s1,s2,m) | ||||
| #define memcmp(s,c,n)    _memcmp(s,c,n) | ||||
| #define memcpy(d,s,n)    _memcpy(d,s,n) | ||||
| #define memmove(d,s,n)   _memmove(d,s,n) | ||||
| #define memset(s,c,n)    _memset(s,c,n) | ||||
| @@ -198,23 +253,31 @@ option on the command line. */ | ||||
| /* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), | ||||
| define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY | ||||
| is set. Otherwise, include an emulating function for those systems that have | ||||
| neither (there some non-Unix environments where this is the case). This assumes | ||||
| that all calls to memmove are moving strings upwards in store, which is the | ||||
| case in PCRE. */ | ||||
| neither (there some non-Unix environments where this is the case). */ | ||||
|  | ||||
| #if ! HAVE_MEMMOVE | ||||
| #ifndef HAVE_MEMMOVE | ||||
| #undef  memmove        /* some systems may have a macro */ | ||||
| #if HAVE_BCOPY | ||||
| #ifdef HAVE_BCOPY | ||||
| #define memmove(a, b, c) bcopy(b, a, c) | ||||
| #else  /* HAVE_BCOPY */ | ||||
| static void * | ||||
| pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n) | ||||
| pcre_memmove(void *d, const void *s, size_t n) | ||||
| { | ||||
| size_t i; | ||||
| dest += n; | ||||
| src += n; | ||||
| for (i = 0; i < n; ++i) *(--dest) =  *(--src); | ||||
| return dest; | ||||
| unsigned char *dest = (unsigned char *)d; | ||||
| const unsigned char *src = (const unsigned char *)s; | ||||
| if (dest > src) | ||||
|   { | ||||
|   dest += n; | ||||
|   src += n; | ||||
|   for (i = 0; i < n; ++i) *(--dest) = *(--src); | ||||
|   return (void *)dest; | ||||
|   } | ||||
| else | ||||
|   { | ||||
|   for (i = 0; i < n; ++i) *dest++ = *src++; | ||||
|   return (void *)(dest - n); | ||||
|   } | ||||
| } | ||||
| #define memmove(a, b, c) pcre_memmove(a, b, c) | ||||
| #endif   /* not HAVE_BCOPY */ | ||||
| @@ -439,7 +502,8 @@ bits. */ | ||||
| /* Masks for identifying the public options that are permitted at compile | ||||
| time, run time, or study time, respectively. */ | ||||
|  | ||||
| #define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY) | ||||
| #define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \ | ||||
|                            PCRE_NEWLINE_ANYCRLF) | ||||
|  | ||||
| #define PUBLIC_OPTIONS \ | ||||
|   (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ | ||||
| @@ -538,9 +602,9 @@ ESC_Z to detect the types that may be repeated. These are the types that | ||||
| consume characters. If any new escapes are put in between that don't consume a | ||||
| character, that code will have to change. */ | ||||
|  | ||||
| enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, | ||||
|        ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_X, ESC_Z, ESC_z, | ||||
|        ESC_E, ESC_Q, ESC_k, ESC_REF }; | ||||
| enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, | ||||
|        ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h, | ||||
|        ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF }; | ||||
|  | ||||
|  | ||||
| /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets | ||||
| @@ -562,133 +626,138 @@ enum { | ||||
|  | ||||
|   OP_SOD,            /* 1 Start of data: \A */ | ||||
|   OP_SOM,            /* 2 Start of match (subject + offset): \G */ | ||||
|   OP_NOT_WORD_BOUNDARY,  /*  3 \B */ | ||||
|   OP_WORD_BOUNDARY,      /*  4 \b */ | ||||
|   OP_NOT_DIGIT,          /*  5 \D */ | ||||
|   OP_DIGIT,              /*  6 \d */ | ||||
|   OP_NOT_WHITESPACE,     /*  7 \S */ | ||||
|   OP_WHITESPACE,         /*  8 \s */ | ||||
|   OP_NOT_WORDCHAR,       /*  9 \W */ | ||||
|   OP_WORDCHAR,           /* 10 \w */ | ||||
|   OP_ANY,            /* 11 Match any character */ | ||||
|   OP_ANYBYTE,        /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */ | ||||
|   OP_NOTPROP,        /* 13 \P (not Unicode property) */ | ||||
|   OP_PROP,           /* 14 \p (Unicode property) */ | ||||
|   OP_ANYNL,          /* 15 \R (any newline sequence) */ | ||||
|   OP_EXTUNI,         /* 16 \X (extended Unicode sequence */ | ||||
|   OP_EODN,           /* 17 End of data or \n at end of data: \Z. */ | ||||
|   OP_EOD,            /* 18 End of data: \z */ | ||||
|   OP_SET_SOM,        /* 3 Set start of match (\K) */ | ||||
|   OP_NOT_WORD_BOUNDARY,  /*  4 \B */ | ||||
|   OP_WORD_BOUNDARY,      /*  5 \b */ | ||||
|   OP_NOT_DIGIT,          /*  6 \D */ | ||||
|   OP_DIGIT,              /*  7 \d */ | ||||
|   OP_NOT_WHITESPACE,     /*  8 \S */ | ||||
|   OP_WHITESPACE,         /*  9 \s */ | ||||
|   OP_NOT_WORDCHAR,       /* 10 \W */ | ||||
|   OP_WORDCHAR,           /* 11 \w */ | ||||
|   OP_ANY,            /* 12 Match any character */ | ||||
|   OP_ANYBYTE,        /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */ | ||||
|   OP_NOTPROP,        /* 14 \P (not Unicode property) */ | ||||
|   OP_PROP,           /* 15 \p (Unicode property) */ | ||||
|   OP_ANYNL,          /* 16 \R (any newline sequence) */ | ||||
|   OP_NOT_HSPACE,     /* 17 \H (not horizontal whitespace) */ | ||||
|   OP_HSPACE,         /* 18 \h (horizontal whitespace) */ | ||||
|   OP_NOT_VSPACE,     /* 19 \V (not vertical whitespace) */ | ||||
|   OP_VSPACE,         /* 20 \v (vertical whitespace) */ | ||||
|   OP_EXTUNI,         /* 21 \X (extended Unicode sequence */ | ||||
|   OP_EODN,           /* 22 End of data or \n at end of data: \Z. */ | ||||
|   OP_EOD,            /* 23 End of data: \z */ | ||||
|  | ||||
|   OP_OPT,            /* 19 Set runtime options */ | ||||
|   OP_CIRC,           /* 20 Start of line - varies with multiline switch */ | ||||
|   OP_DOLL,           /* 21 End of line - varies with multiline switch */ | ||||
|   OP_CHAR,           /* 22 Match one character, casefully */ | ||||
|   OP_CHARNC,         /* 23 Match one character, caselessly */ | ||||
|   OP_NOT,            /* 24 Match one character, not the following one */ | ||||
|   OP_OPT,            /* 24 Set runtime options */ | ||||
|   OP_CIRC,           /* 25 Start of line - varies with multiline switch */ | ||||
|   OP_DOLL,           /* 26 End of line - varies with multiline switch */ | ||||
|   OP_CHAR,           /* 27 Match one character, casefully */ | ||||
|   OP_CHARNC,         /* 28 Match one character, caselessly */ | ||||
|   OP_NOT,            /* 29 Match one character, not the following one */ | ||||
|  | ||||
|   OP_STAR,           /* 25 The maximizing and minimizing versions of */ | ||||
|   OP_MINSTAR,        /* 26 these six opcodes must come in pairs, with */ | ||||
|   OP_PLUS,           /* 27 the minimizing one second. */ | ||||
|   OP_MINPLUS,        /* 28 This first set applies to single characters.*/ | ||||
|   OP_QUERY,          /* 29 */ | ||||
|   OP_MINQUERY,       /* 30 */ | ||||
|   OP_STAR,           /* 30 The maximizing and minimizing versions of */ | ||||
|   OP_MINSTAR,        /* 31 these six opcodes must come in pairs, with */ | ||||
|   OP_PLUS,           /* 32 the minimizing one second. */ | ||||
|   OP_MINPLUS,        /* 33 This first set applies to single characters.*/ | ||||
|   OP_QUERY,          /* 34 */ | ||||
|   OP_MINQUERY,       /* 35 */ | ||||
|  | ||||
|   OP_UPTO,           /* 31 From 0 to n matches */ | ||||
|   OP_MINUPTO,        /* 32 */ | ||||
|   OP_EXACT,          /* 33 Exactly n matches */ | ||||
|   OP_UPTO,           /* 36 From 0 to n matches */ | ||||
|   OP_MINUPTO,        /* 37 */ | ||||
|   OP_EXACT,          /* 38 Exactly n matches */ | ||||
|  | ||||
|   OP_POSSTAR,        /* 34 Possessified star */ | ||||
|   OP_POSPLUS,        /* 35 Possessified plus */ | ||||
|   OP_POSQUERY,       /* 36 Posesssified query */ | ||||
|   OP_POSUPTO,        /* 37 Possessified upto */ | ||||
|   OP_POSSTAR,        /* 39 Possessified star */ | ||||
|   OP_POSPLUS,        /* 40 Possessified plus */ | ||||
|   OP_POSQUERY,       /* 41 Posesssified query */ | ||||
|   OP_POSUPTO,        /* 42 Possessified upto */ | ||||
|  | ||||
|   OP_NOTSTAR,        /* 38 The maximizing and minimizing versions of */ | ||||
|   OP_NOTMINSTAR,     /* 39 these six opcodes must come in pairs, with */ | ||||
|   OP_NOTPLUS,        /* 40 the minimizing one second. They must be in */ | ||||
|   OP_NOTMINPLUS,     /* 41 exactly the same order as those above. */ | ||||
|   OP_NOTQUERY,       /* 42 This set applies to "not" single characters. */ | ||||
|   OP_NOTMINQUERY,    /* 43 */ | ||||
|   OP_NOTSTAR,        /* 43 The maximizing and minimizing versions of */ | ||||
|   OP_NOTMINSTAR,     /* 44 these six opcodes must come in pairs, with */ | ||||
|   OP_NOTPLUS,        /* 45 the minimizing one second. They must be in */ | ||||
|   OP_NOTMINPLUS,     /* 46 exactly the same order as those above. */ | ||||
|   OP_NOTQUERY,       /* 47 This set applies to "not" single characters. */ | ||||
|   OP_NOTMINQUERY,    /* 48 */ | ||||
|  | ||||
|   OP_NOTUPTO,        /* 44 From 0 to n matches */ | ||||
|   OP_NOTMINUPTO,     /* 45 */ | ||||
|   OP_NOTEXACT,       /* 46 Exactly n matches */ | ||||
|   OP_NOTUPTO,        /* 49 From 0 to n matches */ | ||||
|   OP_NOTMINUPTO,     /* 50 */ | ||||
|   OP_NOTEXACT,       /* 51 Exactly n matches */ | ||||
|  | ||||
|   OP_NOTPOSSTAR,     /* 47 Possessified versions */ | ||||
|   OP_NOTPOSPLUS,     /* 48 */ | ||||
|   OP_NOTPOSQUERY,    /* 49 */ | ||||
|   OP_NOTPOSUPTO,     /* 50 */ | ||||
|   OP_NOTPOSSTAR,     /* 52 Possessified versions */ | ||||
|   OP_NOTPOSPLUS,     /* 53 */ | ||||
|   OP_NOTPOSQUERY,    /* 54 */ | ||||
|   OP_NOTPOSUPTO,     /* 55 */ | ||||
|  | ||||
|   OP_TYPESTAR,       /* 51 The maximizing and minimizing versions of */ | ||||
|   OP_TYPEMINSTAR,    /* 52 these six opcodes must come in pairs, with */ | ||||
|   OP_TYPEPLUS,       /* 53 the minimizing one second. These codes must */ | ||||
|   OP_TYPEMINPLUS,    /* 54 be in exactly the same order as those above. */ | ||||
|   OP_TYPEQUERY,      /* 55 This set applies to character types such as \d */ | ||||
|   OP_TYPEMINQUERY,   /* 56 */ | ||||
|   OP_TYPESTAR,       /* 56 The maximizing and minimizing versions of */ | ||||
|   OP_TYPEMINSTAR,    /* 57 these six opcodes must come in pairs, with */ | ||||
|   OP_TYPEPLUS,       /* 58 the minimizing one second. These codes must */ | ||||
|   OP_TYPEMINPLUS,    /* 59 be in exactly the same order as those above. */ | ||||
|   OP_TYPEQUERY,      /* 60 This set applies to character types such as \d */ | ||||
|   OP_TYPEMINQUERY,   /* 61 */ | ||||
|  | ||||
|   OP_TYPEUPTO,       /* 57 From 0 to n matches */ | ||||
|   OP_TYPEMINUPTO,    /* 58 */ | ||||
|   OP_TYPEEXACT,      /* 59 Exactly n matches */ | ||||
|   OP_TYPEUPTO,       /* 62 From 0 to n matches */ | ||||
|   OP_TYPEMINUPTO,    /* 63 */ | ||||
|   OP_TYPEEXACT,      /* 64 Exactly n matches */ | ||||
|  | ||||
|   OP_TYPEPOSSTAR,    /* 60 Possessified versions */ | ||||
|   OP_TYPEPOSPLUS,    /* 61 */ | ||||
|   OP_TYPEPOSQUERY,   /* 62 */ | ||||
|   OP_TYPEPOSUPTO,    /* 63 */ | ||||
|   OP_TYPEPOSSTAR,    /* 65 Possessified versions */ | ||||
|   OP_TYPEPOSPLUS,    /* 66 */ | ||||
|   OP_TYPEPOSQUERY,   /* 67 */ | ||||
|   OP_TYPEPOSUPTO,    /* 68 */ | ||||
|  | ||||
|   OP_CRSTAR,         /* 64 The maximizing and minimizing versions of */ | ||||
|   OP_CRMINSTAR,      /* 65 all these opcodes must come in pairs, with */ | ||||
|   OP_CRPLUS,         /* 66 the minimizing one second. These codes must */ | ||||
|   OP_CRMINPLUS,      /* 67 be in exactly the same order as those above. */ | ||||
|   OP_CRQUERY,        /* 68 These are for character classes and back refs */ | ||||
|   OP_CRMINQUERY,     /* 69 */ | ||||
|   OP_CRRANGE,        /* 70 These are different to the three sets above. */ | ||||
|   OP_CRMINRANGE,     /* 71 */ | ||||
|   OP_CRSTAR,         /* 69 The maximizing and minimizing versions of */ | ||||
|   OP_CRMINSTAR,      /* 70 all these opcodes must come in pairs, with */ | ||||
|   OP_CRPLUS,         /* 71 the minimizing one second. These codes must */ | ||||
|   OP_CRMINPLUS,      /* 72 be in exactly the same order as those above. */ | ||||
|   OP_CRQUERY,        /* 73 These are for character classes and back refs */ | ||||
|   OP_CRMINQUERY,     /* 74 */ | ||||
|   OP_CRRANGE,        /* 75 These are different to the three sets above. */ | ||||
|   OP_CRMINRANGE,     /* 76 */ | ||||
|  | ||||
|   OP_CLASS,          /* 72 Match a character class, chars < 256 only */ | ||||
|   OP_NCLASS,         /* 73 Same, but the bitmap was created from a negative | ||||
|   OP_CLASS,          /* 77 Match a character class, chars < 256 only */ | ||||
|   OP_NCLASS,         /* 78 Same, but the bitmap was created from a negative | ||||
|                            class - the difference is relevant only when a UTF-8 | ||||
|                            character > 255 is encountered. */ | ||||
|  | ||||
|   OP_XCLASS,         /* 74 Extended class for handling UTF-8 chars within the | ||||
|   OP_XCLASS,         /* 79 Extended class for handling UTF-8 chars within the | ||||
|                            class. This does both positive and negative. */ | ||||
|  | ||||
|   OP_REF,            /* 75 Match a back reference */ | ||||
|   OP_RECURSE,        /* 76 Match a numbered subpattern (possibly recursive) */ | ||||
|   OP_CALLOUT,        /* 77 Call out to external function if provided */ | ||||
|   OP_REF,            /* 80 Match a back reference */ | ||||
|   OP_RECURSE,        /* 81 Match a numbered subpattern (possibly recursive) */ | ||||
|   OP_CALLOUT,        /* 82 Call out to external function if provided */ | ||||
|  | ||||
|   OP_ALT,            /* 78 Start of alternation */ | ||||
|   OP_KET,            /* 79 End of group that doesn't have an unbounded repeat */ | ||||
|   OP_KETRMAX,        /* 80 These two must remain together and in this */ | ||||
|   OP_KETRMIN,        /* 81 order. They are for groups the repeat for ever. */ | ||||
|   OP_ALT,            /* 83 Start of alternation */ | ||||
|   OP_KET,            /* 84 End of group that doesn't have an unbounded repeat */ | ||||
|   OP_KETRMAX,        /* 85 These two must remain together and in this */ | ||||
|   OP_KETRMIN,        /* 86 order. They are for groups the repeat for ever. */ | ||||
|  | ||||
|   /* The assertions must come before BRA, CBRA, ONCE, and COND.*/ | ||||
|  | ||||
|   OP_ASSERT,         /* 82 Positive lookahead */ | ||||
|   OP_ASSERT_NOT,     /* 83 Negative lookahead */ | ||||
|   OP_ASSERTBACK,     /* 84 Positive lookbehind */ | ||||
|   OP_ASSERTBACK_NOT, /* 85 Negative lookbehind */ | ||||
|   OP_REVERSE,        /* 86 Move pointer back - used in lookbehind assertions */ | ||||
|   OP_ASSERT,         /* 87 Positive lookahead */ | ||||
|   OP_ASSERT_NOT,     /* 88 Negative lookahead */ | ||||
|   OP_ASSERTBACK,     /* 89 Positive lookbehind */ | ||||
|   OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */ | ||||
|   OP_REVERSE,        /* 91 Move pointer back - used in lookbehind assertions */ | ||||
|  | ||||
|   /* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first, | ||||
|   as there's a test for >= ONCE for a subpattern that isn't an assertion. */ | ||||
|  | ||||
|   OP_ONCE,           /* 87 Atomic group */ | ||||
|   OP_BRA,            /* 88 Start of non-capturing bracket */ | ||||
|   OP_CBRA,           /* 89 Start of capturing bracket */ | ||||
|   OP_COND,           /* 90 Conditional group */ | ||||
|   OP_ONCE,           /* 92 Atomic group */ | ||||
|   OP_BRA,            /* 83 Start of non-capturing bracket */ | ||||
|   OP_CBRA,           /* 94 Start of capturing bracket */ | ||||
|   OP_COND,           /* 95 Conditional group */ | ||||
|  | ||||
|   /* These three must follow the previous three, in the same order. There's a | ||||
|   check for >= SBRA to distinguish the two sets. */ | ||||
|  | ||||
|   OP_SBRA,           /* 91 Start of non-capturing bracket, check empty  */ | ||||
|   OP_SCBRA,          /* 92 Start of capturing bracket, check empty */ | ||||
|   OP_SCOND,          /* 93 Conditional group, check empty */ | ||||
|   OP_SBRA,           /* 96 Start of non-capturing bracket, check empty  */ | ||||
|   OP_SCBRA,          /* 97 Start of capturing bracket, check empty */ | ||||
|   OP_SCOND,          /* 98 Conditional group, check empty */ | ||||
|  | ||||
|   OP_CREF,           /* 94 Used to hold a capture number as condition */ | ||||
|   OP_RREF,           /* 95 Used to hold a recursion number as condition */ | ||||
|   OP_DEF,            /* 96 The DEFINE condition */ | ||||
|   OP_CREF,           /* 99 Used to hold a capture number as condition */ | ||||
|   OP_RREF,           /* 100 Used to hold a recursion number as condition */ | ||||
|   OP_DEF,            /* 101 The DEFINE condition */ | ||||
|  | ||||
|   OP_BRAZERO,        /* 97 These two must remain together and in this */ | ||||
|   OP_BRAMINZERO      /* 98 order. */ | ||||
|   OP_BRAZERO,        /* 102 These two must remain together and in this */ | ||||
|   OP_BRAMINZERO      /* 103 order. */ | ||||
| }; | ||||
|  | ||||
|  | ||||
| @@ -696,10 +765,10 @@ enum { | ||||
| for debugging. The macro is referenced only in pcre_printint.c. */ | ||||
|  | ||||
| #define OP_NAME_LIST \ | ||||
|   "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d",                \ | ||||
|   "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d",         \ | ||||
|   "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \ | ||||
|   "notprop", "prop", "anynl", "extuni",                           \ | ||||
|   "\\Z", "\\z",                                                   \ | ||||
|   "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v",           \ | ||||
|   "extuni",  "\\Z", "\\z",                                        \ | ||||
|   "Opt", "^", "$", "char", "charnc", "not",                       \ | ||||
|   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \ | ||||
|   "*+","++", "?+", "{",                                           \ | ||||
| @@ -726,9 +795,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */ | ||||
|  | ||||
| #define OP_LENGTHS \ | ||||
|   1,                             /* End                                    */ \ | ||||
|   1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \ | ||||
|   1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \ | ||||
|   1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \ | ||||
|   1, 1,                          /* Any, Anybyte                           */ \ | ||||
|   3, 3, 1, 1,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */ \ | ||||
|   3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \ | ||||
|   1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \ | ||||
|   1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \ | ||||
|   2,                             /* Char  - the minimum length             */ \ | ||||
|   2,                             /* Charnc  - the minimum length           */ \ | ||||
| @@ -788,7 +859,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9, | ||||
|        ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, | ||||
|        ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, | ||||
|        ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, | ||||
|        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57 }; | ||||
|        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58 }; | ||||
|  | ||||
| /* The real format of the start of the pcre block; the index of names and the | ||||
| code vector run on as long as necessary after the end. We store an explicit | ||||
| @@ -877,21 +948,11 @@ typedef struct recursion_info { | ||||
|   struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ | ||||
|   int group_num;                /* Number of group that was called */ | ||||
|   const uschar *after_call;     /* "Return value": points after the call in the expr */ | ||||
|   USPTR save_start;             /* Old value of md->start_match */ | ||||
|   USPTR save_start;             /* Old value of mstart */ | ||||
|   int *offset_save;             /* Pointer to start of saved offsets */ | ||||
|   int saved_max;                /* Number of saved offsets */ | ||||
| } recursion_info; | ||||
|  | ||||
| /* When compiling in a mode that doesn't use recursive calls to match(), | ||||
| a structure is used to remember local variables on the heap. It is defined in | ||||
| pcre_exec.c, close to the match() function, so that it is easy to keep it in | ||||
| step with any changes of local variable. However, the pointer to the current | ||||
| frame must be saved in some "static" place over a longjmp(). We declare the | ||||
| structure here so that we can put a pointer in the match_data structure. NOTE: | ||||
| This isn't used for a "normal" compilation of pcre. */ | ||||
|  | ||||
| struct heapframe; | ||||
|  | ||||
| /* Structure for building a chain of data for holding the values of the subject | ||||
| pointer at the start of each subpattern, so as to detect when an empty string | ||||
| has been matched by a subpattern - to break infinite loops. */ | ||||
| @@ -928,7 +989,7 @@ typedef struct match_data { | ||||
|   const uschar *start_code;     /* For use when recursing */ | ||||
|   USPTR  start_subject;         /* Start of the subject string */ | ||||
|   USPTR  end_subject;           /* End of the subject string */ | ||||
|   USPTR  start_match;           /* Start of this match attempt */ | ||||
|   USPTR  start_match_ptr;       /* Start of matched string */ | ||||
|   USPTR  end_match_ptr;         /* Subject position at end match */ | ||||
|   int    end_offset_top;        /* Highwater mark at end of match */ | ||||
|   int    capture_last;          /* Most recent capture number */ | ||||
| @@ -937,7 +998,6 @@ typedef struct match_data { | ||||
|   int    eptrn;                 /* Next free eptrblock */ | ||||
|   recursion_info *recursive;    /* Linked list of recursion data */ | ||||
|   void  *callout_data;          /* To pass back to callouts */ | ||||
|   struct heapframe *thisframe;  /* Used only when compiling for no recursion */ | ||||
| } match_data; | ||||
|  | ||||
| /* A similar structure is used for the same purpose by the DFA matching | ||||
| @@ -1024,16 +1084,16 @@ extern const uschar _pcre_OP_lengths[]; | ||||
| one of the exported public functions. They have to be "external" in the C | ||||
| sense, but are not part of the PCRE public API. */ | ||||
|  | ||||
| extern BOOL         _pcre_is_newline(const uschar *, const uschar *, int *, | ||||
|                       BOOL); | ||||
| extern BOOL         _pcre_is_newline(const uschar *, int, const uschar *, | ||||
|                       int *, BOOL); | ||||
| extern int          _pcre_ord2utf8(int, uschar *); | ||||
| extern real_pcre   *_pcre_try_flipped(const real_pcre *, real_pcre *, | ||||
|                       const pcre_study_data *, pcre_study_data *); | ||||
| extern int          _pcre_ucp_findprop(const unsigned int, int *, int *); | ||||
| extern unsigned int _pcre_ucp_othercase(const unsigned int); | ||||
| extern int          _pcre_valid_utf8(const uschar *, int); | ||||
| extern BOOL         _pcre_was_newline(const uschar *, const uschar *, int *, | ||||
|                       BOOL); | ||||
| extern BOOL         _pcre_was_newline(const uschar *, int, const uschar *, | ||||
|                       int *, BOOL); | ||||
| extern BOOL         _pcre_xclass(int, const uschar *); | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -42,9 +42,8 @@ POSSIBILITY OF SUCH DAMAGE. | ||||
| one kind of newline is to be recognized. When a newline is found, its length is | ||||
| returned. In principle, we could implement several newline "types", each | ||||
| referring to a different set of newline characters. At present, PCRE supports | ||||
| only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL, | ||||
| so for now the type isn't passed into the functions. It can easily be added | ||||
| later if required. The full list of Unicode newline characters is taken from | ||||
| only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, | ||||
| and NLTYPE_ANY. The full list of Unicode newline characters is taken from | ||||
| http://unicode.org/unicode/reports/tr18/. */ | ||||
|  | ||||
|  | ||||
| @@ -61,6 +60,7 @@ string that is being processed. | ||||
|  | ||||
| Arguments: | ||||
|   ptr          pointer to possible newline | ||||
|   type         the newline type | ||||
|   endptr       pointer to the end of the string | ||||
|   lenptr       where to return the length | ||||
|   utf8         TRUE if in utf8 mode | ||||
| @@ -69,12 +69,23 @@ Returns:       TRUE or FALSE | ||||
| */ | ||||
|  | ||||
| BOOL | ||||
| _pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr, | ||||
|   BOOL utf8) | ||||
| _pcre_is_newline(const uschar *ptr, int type, const uschar *endptr, | ||||
|   int *lenptr, BOOL utf8) | ||||
| { | ||||
| int c; | ||||
| if (utf8) { GETCHAR(c, ptr); } else c = *ptr; | ||||
| switch(c) | ||||
|  | ||||
| if (type == NLTYPE_ANYCRLF) switch(c) | ||||
|   { | ||||
|   case 0x000a: *lenptr = 1; return TRUE;             /* LF */ | ||||
|   case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; | ||||
|                return TRUE;                          /* CR */ | ||||
|   default: return FALSE; | ||||
|   } | ||||
|  | ||||
| /* NLTYPE_ANY */ | ||||
|  | ||||
| else switch(c) | ||||
|   { | ||||
|   case 0x000a:                                       /* LF */ | ||||
|   case 0x000b:                                       /* VT */ | ||||
| @@ -99,6 +110,7 @@ the string that is being processed. | ||||
|  | ||||
| Arguments: | ||||
|   ptr          pointer to possible newline | ||||
|   type         the newline type | ||||
|   startptr     pointer to the start of the string | ||||
|   lenptr       where to return the length | ||||
|   utf8         TRUE if in utf8 mode | ||||
| @@ -107,8 +119,8 @@ Returns:       TRUE or FALSE | ||||
| */ | ||||
|  | ||||
| BOOL | ||||
| _pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr, | ||||
|   BOOL utf8) | ||||
| _pcre_was_newline(const uschar *ptr, int type, const uschar *startptr, | ||||
|   int *lenptr, BOOL utf8) | ||||
| { | ||||
| int c; | ||||
| ptr--; | ||||
| @@ -118,7 +130,16 @@ if (utf8) | ||||
|   GETCHAR(c, ptr); | ||||
|   } | ||||
| else c = *ptr; | ||||
| switch(c) | ||||
|  | ||||
| if (type == NLTYPE_ANYCRLF) switch(c) | ||||
|   { | ||||
|   case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; | ||||
|                return TRUE;                         /* LF */ | ||||
|   case 0x000d: *lenptr = 1; return TRUE;            /* CR */ | ||||
|   default: return FALSE; | ||||
|   } | ||||
|  | ||||
| else switch(c) | ||||
|   { | ||||
|   case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; | ||||
|                return TRUE;                         /* LF */ | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -62,6 +62,7 @@ Returns:     number of characters placed in the buffer | ||||
| int | ||||
| _pcre_ord2utf8(int cvalue, uschar *buffer) | ||||
| { | ||||
| #ifdef SUPPORT_UTF8 | ||||
| register int i, j; | ||||
| for (i = 0; i < _pcre_utf8_table1_size; i++) | ||||
|   if (cvalue <= _pcre_utf8_table1[i]) break; | ||||
| @@ -73,6 +74,9 @@ for (j = i; j > 0; j--) | ||||
|  } | ||||
| *buffer = _pcre_utf8_table2[i] | cvalue; | ||||
| return i + 1; | ||||
| #else | ||||
| return 0;   /* Keep compiler happy; this function won't ever be */ | ||||
| #endif      /* called when SUPPORT_UTF8 is not defined. */ | ||||
| } | ||||
|  | ||||
| /* End of pcre_ord2utf8.c */ | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -63,7 +63,7 @@ Returns:        the (possibly updated) count value (a non-negative number), or | ||||
|                 a negative error number | ||||
| */ | ||||
|  | ||||
| PCRE_DATA_SCOPE int | ||||
| PCRE_EXP_DEFN int | ||||
| pcre_refcount(pcre *argument_re, int adjust) | ||||
| { | ||||
| real_pcre *re = (real_pcre *)argument_re; | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -394,11 +394,13 @@ do | ||||
|       character with a value > 255. */ | ||||
|  | ||||
|       case OP_NCLASS: | ||||
| #ifdef SUPPORT_UTF8 | ||||
|       if (utf8) | ||||
|         { | ||||
|         start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */ | ||||
|         memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */ | ||||
|         } | ||||
| #endif | ||||
|       /* Fall through */ | ||||
|  | ||||
|       case OP_CLASS: | ||||
| @@ -411,6 +413,7 @@ do | ||||
|         value is > 127. In fact, there are only two possible starting bytes for | ||||
|         characters in the range 128 - 255. */ | ||||
|  | ||||
| #ifdef SUPPORT_UTF8 | ||||
|         if (utf8) | ||||
|           { | ||||
|           for (c = 0; c < 16; c++) start_bits[c] |= tcode[c]; | ||||
| @@ -428,6 +431,7 @@ do | ||||
|         /* In non-UTF-8 mode, the two bit maps are completely compatible. */ | ||||
|  | ||||
|         else | ||||
| #endif | ||||
|           { | ||||
|           for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; | ||||
|           } | ||||
| @@ -487,7 +491,7 @@ Returns:    pointer to a pcre_extra block, with study_data filled in and the | ||||
|             NULL on error or if no optimization possible | ||||
| */ | ||||
|  | ||||
| PCRE_DATA_SCOPE pcre_extra * | ||||
| PCRE_EXP_DEFN pcre_extra * | ||||
| pcre_study(const pcre *external_re, int options, const char **errorptr) | ||||
| { | ||||
| uschar start_bits[32]; | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -61,6 +61,8 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS }; | ||||
| /* These are the breakpoints for different numbers of bytes in a UTF-8 | ||||
| character. */ | ||||
|  | ||||
| #ifdef SUPPORT_UTF8 | ||||
|  | ||||
| const int _pcre_utf8_table1[] = | ||||
|   { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; | ||||
|  | ||||
| @@ -301,4 +303,6 @@ const ucp_type_table _pcre_utt[] = { | ||||
|  | ||||
| const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); | ||||
|  | ||||
| #endif  /* SUPPORT_UTF8 */ | ||||
|  | ||||
| /* End of pcre_tables.c */ | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -75,7 +75,7 @@ I could find no way of detecting that a macro is defined as an empty string at | ||||
| pre-processor time. This hack uses a standard trick for avoiding calling | ||||
| the STRING macro with an empty argument when doing the test. */ | ||||
|  | ||||
| PCRE_DATA_SCOPE const char * | ||||
| PCRE_EXP_DEFN const char * | ||||
| pcre_version(void) | ||||
| { | ||||
| return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)? | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2006 University of Cambridge | ||||
|            Copyright (c) 1997-2007 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
|   | ||||
		Reference in New Issue
	
	Block a user