regex: Update included PCRE to 8.30

This commit is contained in:
Christian Persch 2012-02-12 19:14:59 +01:00 committed by Matthias Clasen
parent da4293a4e4
commit a40523b7f2
28 changed files with 17987 additions and 3881 deletions

View File

@ -1,8 +1,12 @@
include $(top_srcdir)/Makefile.decl
INCLUDES = \
noinst_LTLIBRARIES = libpcre.la
libpcre_la_CPPFLAGS = \
-DG_LOG_DOMAIN=\"GLib-GRegex\" \
-DHAVE_CONFIG_H \
-DSUPPORT_UCP \
-DSUPPORT_UTF \
-DSUPPORT_UTF8 \
-DNEWLINE=-1 \
-DMATCH_LIMIT=10000000 \
@ -13,45 +17,48 @@ INCLUDES = \
-DLINK_SIZE=2 \
-DPOSIX_MALLOC_THRESHOLD=10 \
-DPCRE_STATIC \
-UBSR_ANYCRLF \
-UEBCDIC \
$(glib_INCLUDES) \
@GLIB_DEBUG_FLAGS@ \
-DG_DISABLE_DEPRECATED \
-DGLIB_COMPILATION \
$(DEPRECATED_FLAGS)\
$(WARN_CFLAGS) \
$(AM_CPPFLAGS)
libpcre_la_CFLAGS = \
$(PCRE_WARN_CFLAGS) \
$(DEP_CFLAGS)
$(DEP_CFLAGS) \
$(AM_CFLAGS)
noinst_LTLIBRARIES = libpcre.la
libpcre_headers =
libpcre_la_LDFLAGS = \
-no-undefined \
$(AM_LDFLAGS)
libpcre_la_SOURCES = \
pcre_compile.c \
pcre_byte_order.c \
pcre_chartables.c \
pcre_compile.c \
pcre_config.c \
pcre_dfa_exec.c \
pcre_exec.c \
pcre_fullinfo.c \
pcre_get.c \
pcre_globals.c \
pcre_jit_compile.c \
pcre_maketables.c \
pcre_newline.c \
pcre_ord2utf8.c \
pcre_refcount.c \
pcre_string_utils.c \
pcre_study.c \
pcre_tables.c \
pcre_try_flipped.c \
pcre_ucp_searchfuncs.c \
pcre_ucd.c \
pcre_valid_utf8.c \
pcre_version.c \
pcre_xclass.c \
pcre.h \
pcre_internal.h \
ucp.h \
$(libpcre_headers)
pcre.h
libpcre_la_LIBADD = $(DEP_LIBS)
libpcre_la_LDFLAGS = -no-undefined
EXTRA_DIST += \
COPYING \
makefile.msc

View File

@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
Copyright (c) 1997-2010 University of Cambridge
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE_MAJOR 8
#define PCRE_MINOR 12
#define PCRE_MINOR 30
#define PCRE_PRERELEASE
#define PCRE_DATE 2011-01-15
#define PCRE_DATE 2012-02-04
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
@ -98,28 +98,37 @@ extern "C" {
/* Options. Some are compile-time only, some are run-time only, and some are
both, so we keep them all distinct. However, almost all the bits in the options
word are now used. In the long run, we may have to re-use some of the
compile-time only bits for runtime options, or vice versa. */
compile-time only bits for runtime options, or vice versa. In the comments
below, "compile", "exec", and "DFA exec" mean that the option is permitted to
be set for those functions; "used in" means that an option may be set only for
compile, but is subsequently referenced in exec and/or DFA exec. Any of the
compile-time options may be inspected during studying (and therefore JIT
compiling). */
#define PCRE_CASELESS 0x00000001 /* Compile */
#define PCRE_MULTILINE 0x00000002 /* Compile */
#define PCRE_DOTALL 0x00000004 /* Compile */
#define PCRE_EXTENDED 0x00000008 /* Compile */
#define PCRE_ANCHORED 0x00000010 /* Compile, exec, DFA exec */
#define PCRE_DOLLAR_ENDONLY 0x00000020 /* Compile */
#define PCRE_DOLLAR_ENDONLY 0x00000020 /* Compile, used in exec, DFA exec */
#define PCRE_EXTRA 0x00000040 /* Compile */
#define PCRE_NOTBOL 0x00000080 /* Exec, DFA exec */
#define PCRE_NOTEOL 0x00000100 /* Exec, DFA exec */
#define PCRE_UNGREEDY 0x00000200 /* Compile */
#define PCRE_NOTEMPTY 0x00000400 /* Exec, DFA exec */
#define PCRE_UTF8 0x00000800 /* Compile */
/* The next two are also used in exec and DFA exec */
#define PCRE_UTF8 0x00000800 /* Compile (same as PCRE_UTF16) */
#define PCRE_UTF16 0x00000800 /* Compile (same as PCRE_UTF8) */
#define PCRE_NO_AUTO_CAPTURE 0x00001000 /* Compile */
#define PCRE_NO_UTF8_CHECK 0x00002000 /* Compile, exec, DFA exec */
/* The next two are also used in exec and DFA exec */
#define PCRE_NO_UTF8_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF16_CHECK) */
#define PCRE_NO_UTF16_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF8_CHECK) */
#define PCRE_AUTO_CALLOUT 0x00004000 /* Compile */
#define PCRE_PARTIAL_SOFT 0x00008000 /* Exec, DFA exec */
#define PCRE_PARTIAL 0x00008000 /* Backwards compatible synonym */
#define PCRE_DFA_SHORTEST 0x00010000 /* DFA exec */
#define PCRE_DFA_RESTART 0x00020000 /* DFA exec */
#define PCRE_FIRSTLINE 0x00040000 /* Compile */
#define PCRE_FIRSTLINE 0x00040000 /* Compile, used in exec, DFA exec */
#define PCRE_DUPNAMES 0x00080000 /* Compile */
#define PCRE_NEWLINE_CR 0x00100000 /* Compile, exec, DFA exec */
#define PCRE_NEWLINE_LF 0x00200000 /* Compile, exec, DFA exec */
@ -128,41 +137,81 @@ compile-time only bits for runtime options, or vice versa. */
#define PCRE_NEWLINE_ANYCRLF 0x00500000 /* Compile, exec, DFA exec */
#define PCRE_BSR_ANYCRLF 0x00800000 /* Compile, exec, DFA exec */
#define PCRE_BSR_UNICODE 0x01000000 /* Compile, exec, DFA exec */
#define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* Compile */
#define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* Compile, used in exec */
#define PCRE_NO_START_OPTIMIZE 0x04000000 /* Compile, exec, DFA exec */
#define PCRE_NO_START_OPTIMISE 0x04000000 /* Synonym */
#define PCRE_PARTIAL_HARD 0x08000000 /* Exec, DFA exec */
#define PCRE_NOTEMPTY_ATSTART 0x10000000 /* Exec, DFA exec */
#define PCRE_UCP 0x20000000 /* Compile */
#define PCRE_UCP 0x20000000 /* Compile, used in exec, DFA exec */
/* Exec-time and get/set-time error codes */
#define PCRE_ERROR_NOMATCH (-1)
#define PCRE_ERROR_NULL (-2)
#define PCRE_ERROR_BADOPTION (-3)
#define PCRE_ERROR_BADMAGIC (-4)
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
#define PCRE_ERROR_NOMEMORY (-6)
#define PCRE_ERROR_NOSUBSTRING (-7)
#define PCRE_ERROR_MATCHLIMIT (-8)
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
#define PCRE_ERROR_BADUTF8 (-10)
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
#define PCRE_ERROR_PARTIAL (-12)
#define PCRE_ERROR_BADPARTIAL (-13)
#define PCRE_ERROR_INTERNAL (-14)
#define PCRE_ERROR_BADCOUNT (-15)
#define PCRE_ERROR_DFA_UITEM (-16)
#define PCRE_ERROR_DFA_UCOND (-17)
#define PCRE_ERROR_DFA_UMLIMIT (-18)
#define PCRE_ERROR_DFA_WSSIZE (-19)
#define PCRE_ERROR_DFA_RECURSE (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
#define PCRE_ERROR_BADNEWLINE (-23)
#define PCRE_ERROR_BADOFFSET (-24)
#define PCRE_ERROR_SHORTUTF8 (-25)
#define PCRE_ERROR_NOMATCH (-1)
#define PCRE_ERROR_NULL (-2)
#define PCRE_ERROR_BADOPTION (-3)
#define PCRE_ERROR_BADMAGIC (-4)
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
#define PCRE_ERROR_NOMEMORY (-6)
#define PCRE_ERROR_NOSUBSTRING (-7)
#define PCRE_ERROR_MATCHLIMIT (-8)
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16 */
#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16 */
#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */
#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */
#define PCRE_ERROR_PARTIAL (-12)
#define PCRE_ERROR_BADPARTIAL (-13)
#define PCRE_ERROR_INTERNAL (-14)
#define PCRE_ERROR_BADCOUNT (-15)
#define PCRE_ERROR_DFA_UITEM (-16)
#define PCRE_ERROR_DFA_UCOND (-17)
#define PCRE_ERROR_DFA_UMLIMIT (-18)
#define PCRE_ERROR_DFA_WSSIZE (-19)
#define PCRE_ERROR_DFA_RECURSE (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
#define PCRE_ERROR_BADNEWLINE (-23)
#define PCRE_ERROR_BADOFFSET (-24)
#define PCRE_ERROR_SHORTUTF8 (-25)
#define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */
#define PCRE_ERROR_RECURSELOOP (-26)
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
#define PCRE_ERROR_BADMODE (-28)
#define PCRE_ERROR_BADENDIANNESS (-29)
/* Specific error codes for UTF-8 validity checks */
#define PCRE_UTF8_ERR0 0
#define PCRE_UTF8_ERR1 1
#define PCRE_UTF8_ERR2 2
#define PCRE_UTF8_ERR3 3
#define PCRE_UTF8_ERR4 4
#define PCRE_UTF8_ERR5 5
#define PCRE_UTF8_ERR6 6
#define PCRE_UTF8_ERR7 7
#define PCRE_UTF8_ERR8 8
#define PCRE_UTF8_ERR9 9
#define PCRE_UTF8_ERR10 10
#define PCRE_UTF8_ERR11 11
#define PCRE_UTF8_ERR12 12
#define PCRE_UTF8_ERR13 13
#define PCRE_UTF8_ERR14 14
#define PCRE_UTF8_ERR15 15
#define PCRE_UTF8_ERR16 16
#define PCRE_UTF8_ERR17 17
#define PCRE_UTF8_ERR18 18
#define PCRE_UTF8_ERR19 19
#define PCRE_UTF8_ERR20 20
#define PCRE_UTF8_ERR21 21
/* Specific error codes for UTF-16 validity checks */
#define PCRE_UTF16_ERR0 0
#define PCRE_UTF16_ERR1 1
#define PCRE_UTF16_ERR2 2
#define PCRE_UTF16_ERR3 3
#define PCRE_UTF16_ERR4 4
/* Request types for pcre_fullinfo() */
@ -183,6 +232,8 @@ compile-time only bits for runtime options, or vice versa. */
#define PCRE_INFO_JCHANGED 13
#define PCRE_INFO_HASCRORLF 14
#define PCRE_INFO_MINLENGTH 15
#define PCRE_INFO_JIT 16
#define PCRE_INFO_JITSIZE 17
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
@ -196,8 +247,16 @@ compatible. */
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
#define PCRE_CONFIG_BSR 8
#define PCRE_CONFIG_JIT 9
#define PCRE_CONFIG_UTF16 10
#define PCRE_CONFIG_JITTARGET 11
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
/* Request types for pcre_study(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_STUDY_JIT_COMPILE 0x0001
/* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_STUDY_DATA 0x0001
@ -206,10 +265,32 @@ these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
#define PCRE_EXTRA_MARK 0x0020
#define PCRE_EXTRA_EXECUTABLE_JIT 0x0040
/* Types */
typedef struct real_pcre pcre; /* declaration; the definition is private */
struct real_pcre; /* declaration; the definition is private */
typedef struct real_pcre pcre;
struct real_pcre16; /* declaration; the definition is private */
typedef struct real_pcre16 pcre16;
struct real_pcre_jit_stack; /* declaration; the definition is private */
typedef struct real_pcre_jit_stack pcre_jit_stack;
struct real_pcre16_jit_stack; /* declaration; the definition is private */
typedef struct real_pcre16_jit_stack pcre16_jit_stack;
/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
a 16 bit wide signed data type. Otherwise it can be a dummy data type since
pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
#ifndef PCRE_UCHAR16
#define PCRE_UCHAR16 unsigned short
#endif
#ifndef PCRE_SPTR16
#define PCRE_SPTR16 const PCRE_UCHAR16 *
#endif
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
@ -231,8 +312,22 @@ typedef struct pcre_extra {
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
unsigned char **mark; /* For passing back a mark pointer */
void *executable_jit; /* Contains a pointer to a compiled jit code */
} pcre_extra;
/* Same structure as above, but with 16 bit char pointers. */
typedef struct pcre16_extra {
unsigned long int flags; /* Bits for which fields are set */
void *study_data; /* Opaque data from pcre_study() */
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
PCRE_UCHAR16 **mark; /* For passing back a mark pointer */
void *executable_jit; /* Contains a pointer to a compiled jit code */
} pcre16_extra;
/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
@ -253,50 +348,153 @@ typedef struct pcre_callout_block {
/* ------------------- Added for Version 1 -------------------------- */
int pattern_position; /* Offset to next item in the pattern */
int next_item_length; /* Length of next item in the pattern */
/* ------------------- Added for Version 2 -------------------------- */
const unsigned char *mark; /* Pointer to current mark or NULL */
/* ------------------------------------------------------------------ */
} pcre_callout_block;
#include "glib.h"
/* Same structure as above, but with 16 bit char pointers. */
#define pcre_malloc g_try_malloc
#define pcre_free g_free
#define pcre_stack_malloc g_try_malloc
typedef struct pcre16_callout_block {
int version; /* Identifies version of block */
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
PCRE_SPTR16 subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
int capture_top; /* Max current capture */
int capture_last; /* Most recently closed capture */
void *callout_data; /* Data passed in with the call */
/* ------------------- Added for Version 1 -------------------------- */
int pattern_position; /* Offset to next item in the pattern */
int next_item_length; /* Length of next item in the pattern */
/* ------------------- Added for Version 2 -------------------------- */
const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */
/* ------------------------------------------------------------------ */
} pcre16_callout_block;
/* Indirection for store get and free functions. These can be set to
alternative malloc/free functions if required. Special ones are used in the
non-recursive case for "frames". There is also an optional callout function
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
have to take another form. */
#ifndef VPCOMPAT
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_free)(void *);
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
PCRE_EXP_DECL void (*pcre16_free)(void *);
PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre16_stack_free)(void *);
PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *);
#else /* VPCOMPAT */
PCRE_EXP_DECL void *pcre_malloc(size_t);
PCRE_EXP_DECL void pcre_free(void *);
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
PCRE_EXP_DECL void pcre_stack_free(void *);
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
PCRE_EXP_DECL void *pcre16_malloc(size_t);
PCRE_EXP_DECL void pcre16_free(void *);
PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
PCRE_EXP_DECL void pcre16_stack_free(void *);
PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *);
#endif /* VPCOMPAT */
/* User defined callback which provides a stack just before the match starts. */
typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
/* Exported PCRE functions */
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
const unsigned char *);
PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
const unsigned char *);
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
int *, const unsigned char *);
PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
int *, const unsigned char *);
PCRE_EXP_DECL int pcre_config(int, void *);
PCRE_EXP_DECL int pcre16_config(int, void *);
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
int *, int, const char *, char *, int);
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
int);
PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int,
char *, int);
PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
PCRE_UCHAR16 *, int);
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
const char *, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
PCRE_SPTR16, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *,
PCRE_SPTR16, int, int, int, int *, int);
PCRE_EXP_DECL void pcre_free_substring(const char *);
PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
void *);
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
int *, int, const char *, const char **);
PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
char **, char **);
PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
PCRE_UCHAR16 **, PCRE_UCHAR16 **);
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
const char **);
PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int,
PCRE_SPTR16 *);
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
const char ***);
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int,
PCRE_SPTR16 **);
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int);
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
PCRE_EXP_DECL const char *pcre_version(void);
PCRE_EXP_DECL const char *pcre16_version(void);
/* Utility functions for byte order swaps. */
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
const unsigned char *);
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
const unsigned char *);
PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
PCRE_SPTR16, int, int *, int);
/* JIT compiler related functions. */
PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
pcre_jit_callback, void *);
PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
pcre16_jit_callback, void *);
#ifdef __cplusplus
} /* extern "C" */

288
glib/pcre/pcre_byte_order.c Normal file
View File

@ -0,0 +1,288 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that tests a compiled pattern to
see if it was compiled with the opposite endianness. If so, it uses an
auxiliary local function to flip the appropriate bytes. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Swap byte functions *
*************************************************/
/* The following functions swap the bytes of a pcre_uint16
and pcre_uint32 value.
Arguments:
value any number
Returns: the byte swapped value
*/
static pcre_uint32
swap_uint32(pcre_uint32 value)
{
return ((value & 0x000000ff) << 24) |
((value & 0x0000ff00) << 8) |
((value & 0x00ff0000) >> 8) |
(value >> 24);
}
static pcre_uint16
swap_uint16(pcre_uint16 value)
{
return (value >> 8) | (value << 8);
}
/*************************************************
* Test for a byte-flipped compiled regex *
*************************************************/
/* This function swaps the bytes of a compiled pattern usually
loaded form the disk. It also sets the tables pointer, which
is likely an invalid pointer after reload.
Arguments:
argument_re points to the compiled expression
extra_data points to extra data or is NULL
tables points to the character tables or NULL
Returns: 0 if the swap is successful, negative on error
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *argument_re,
pcre_extra *extra_data, const unsigned char *tables)
#else
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *argument_re,
pcre16_extra *extra_data, const unsigned char *tables)
#endif
{
REAL_PCRE *re = (REAL_PCRE *)argument_re;
pcre_study_data *study;
#ifndef COMPILE_PCRE8
pcre_uchar *ptr;
int length;
#ifdef SUPPORT_UTF
BOOL utf;
BOOL utf16_char;
#endif /* SUPPORT_UTF */
#endif /* !COMPILE_PCRE8 */
if (re == NULL) return PCRE_ERROR_NULL;
if (re->magic_number == MAGIC_NUMBER)
{
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->tables = tables;
return 0;
}
if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->magic_number = MAGIC_NUMBER;
re->size = swap_uint32(re->size);
re->options = swap_uint32(re->options);
re->flags = swap_uint16(re->flags);
re->top_bracket = swap_uint16(re->top_bracket);
re->top_backref = swap_uint16(re->top_backref);
re->first_char = swap_uint16(re->first_char);
re->req_char = swap_uint16(re->req_char);
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
re->ref_count = swap_uint16(re->ref_count);
re->tables = tables;
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
{
study = (pcre_study_data *)extra_data->study_data;
study->size = swap_uint32(study->size);
study->flags = swap_uint32(study->flags);
study->minlength = swap_uint32(study->minlength);
}
#ifndef COMPILE_PCRE8
ptr = (pcre_uchar *)re + re->name_table_offset;
length = re->name_count * re->name_entry_size;
#ifdef SUPPORT_UTF
utf = (re->options & PCRE_UTF16) != 0;
utf16_char = FALSE;
#endif
while(TRUE)
{
/* Swap previous characters. */
while (length-- > 0)
{
*ptr = swap_uint16(*ptr);
ptr++;
}
#ifdef SUPPORT_UTF
if (utf16_char)
{
if (HAS_EXTRALEN(ptr[-1]))
{
/* We know that there is only one extra character in UTF-16. */
*ptr = swap_uint16(*ptr);
ptr++;
}
}
utf16_char = FALSE;
#endif /* SUPPORT_UTF */
/* Get next opcode. */
length = 0;
*ptr = swap_uint16(*ptr);
switch (*ptr)
{
case OP_END:
return 0;
#ifdef SUPPORT_UTF
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_STAR:
case OP_MINSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_QUERY:
case OP_MINQUERY:
case OP_UPTO:
case OP_MINUPTO:
case OP_EXACT:
case OP_POSSTAR:
case OP_POSPLUS:
case OP_POSQUERY:
case OP_POSUPTO:
case OP_STARI:
case OP_MINSTARI:
case OP_PLUSI:
case OP_MINPLUSI:
case OP_QUERYI:
case OP_MINQUERYI:
case OP_UPTOI:
case OP_MINUPTOI:
case OP_EXACTI:
case OP_POSSTARI:
case OP_POSPLUSI:
case OP_POSQUERYI:
case OP_POSUPTOI:
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
case OP_NOTEXACT:
case OP_NOTPOSSTAR:
case OP_NOTPOSPLUS:
case OP_NOTPOSQUERY:
case OP_NOTPOSUPTO:
case OP_NOTSTARI:
case OP_NOTMINSTARI:
case OP_NOTPLUSI:
case OP_NOTMINPLUSI:
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
case OP_NOTUPTOI:
case OP_NOTMINUPTOI:
case OP_NOTEXACTI:
case OP_NOTPOSSTARI:
case OP_NOTPOSPLUSI:
case OP_NOTPOSQUERYI:
case OP_NOTPOSUPTOI:
if (utf) utf16_char = TRUE;
#endif
/* Fall through. */
default:
length = PRIV(OP_lengths)[*ptr] - 1;
break;
case OP_CLASS:
case OP_NCLASS:
/* Skip the character bit map. */
ptr += 32/sizeof(pcre_uchar);
length = 0;
break;
case OP_XCLASS:
/* Reverse the size of the XCLASS instance. */
ptr++;
*ptr = swap_uint16(*ptr);
if (LINK_SIZE > 1)
{
/* LINK_SIZE can be 1 or 2 in 16 bit mode. */
ptr++;
*ptr = swap_uint16(*ptr);
}
ptr++;
length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1);
*ptr = swap_uint16(*ptr);
if ((*ptr & XCL_MAP) != 0)
{
/* Skip the character bit map. */
ptr += 32/sizeof(pcre_uchar);
length -= 32/sizeof(pcre_uchar);
}
break;
}
ptr++;
}
/* Control should never reach here in 16 bit mode. */
#endif /* !COMPILE_PCRE8 */
return 0;
}
/* End of pcre_byte_order.c */

View File

@ -26,7 +26,7 @@ unit might reference this" and so it will always be supplied to the linker. */
#include "pcre_internal.h"
const unsigned char _pcre_default_tables[] = {
const pcre_uint8 PRIV(default_tables)[] = {
/* This table is a lower casing table. */

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2009 University of Cambridge
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -45,6 +45,9 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h"
#endif
/* Keep the original link size. */
static int real_link_size = LINK_SIZE;
#include "pcre_internal.h"
@ -62,18 +65,41 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_config(int what, void *where)
#else
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_config(int what, void *where)
#endif
{
switch (what)
{
case PCRE_CONFIG_UTF8:
#ifdef SUPPORT_UTF8
#if defined COMPILE_PCRE16
*((int *)where) = 0;
return PCRE_ERROR_BADOPTION;
#else
#if defined SUPPORT_UTF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
#endif
case PCRE_CONFIG_UTF16:
#if defined COMPILE_PCRE8
*((int *)where) = 0;
return PCRE_ERROR_BADOPTION;
#else
#if defined SUPPORT_UTF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
#endif
case PCRE_CONFIG_UNICODE_PROPERTIES:
#ifdef SUPPORT_UCP
@ -83,6 +109,22 @@ switch (what)
#endif
break;
case PCRE_CONFIG_JIT:
#ifdef SUPPORT_JIT
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_JITTARGET:
#ifdef SUPPORT_JIT
*((const char **)where) = PRIV(jit_get_target)();
#else
*((const char **)where) = NULL;
#endif
break;
case PCRE_CONFIG_NEWLINE:
*((int *)where) = NEWLINE;
break;
@ -96,7 +138,7 @@ switch (what)
break;
case PCRE_CONFIG_LINK_SIZE:
*((int *)where) = LINK_SIZE;
*((int *)where) = real_link_size;
break;
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2009 University of Cambridge
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -65,13 +65,17 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
int what, void *where)
#else
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data,
int what, void *where)
#endif
{
real_pcre internal_re;
pcre_study_data internal_study;
const real_pcre *re = (const real_pcre *)argument_re;
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
const pcre_study_data *study = NULL;
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
@ -79,12 +83,18 @@ if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
means that the pattern is likely compiled with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
{
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}
return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
/* Check that this pattern was compiled in the correct bit mode */
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
switch (what)
{
@ -100,6 +110,18 @@ switch (what)
*((size_t *)where) = (study == NULL)? 0 : study->size;
break;
case PCRE_INFO_JITSIZE:
#ifdef SUPPORT_JIT
*((size_t *)where) =
(extra_data != NULL &&
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
extra_data->executable_jit != NULL)?
PRIV(jit_get_size)(extra_data->executable_jit) : 0;
#else
*((size_t *)where) = 0;
#endif
break;
case PCRE_INFO_CAPTURECOUNT:
*((int *)where) = re->top_bracket;
break;
@ -110,7 +132,7 @@ switch (what)
case PCRE_INFO_FIRSTBYTE:
*((int *)where) =
((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
break;
@ -118,7 +140,7 @@ switch (what)
block, not the internal copy (with flipped integer fields). */
case PCRE_INFO_FIRSTTABLE:
*((const uschar **)where) =
*((const pcre_uint8 **)where) =
(study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
break;
@ -126,12 +148,18 @@ switch (what)
case PCRE_INFO_MINLENGTH:
*((int *)where) =
(study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
study->minlength : -1;
(int)(study->minlength) : -1;
break;
case PCRE_INFO_JIT:
*((int *)where) = extra_data != NULL &&
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
extra_data->executable_jit != NULL;
break;
case PCRE_INFO_LASTLITERAL:
*((int *)where) =
((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
((re->flags & PCRE_REQCHSET) != 0)? re->req_char : -1;
break;
case PCRE_INFO_NAMEENTRYSIZE:
@ -143,11 +171,11 @@ switch (what)
break;
case PCRE_INFO_NAMETABLE:
*((const uschar **)where) = (const uschar *)re + re->name_table_offset;
*((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset;
break;
case PCRE_INFO_DEFAULT_TABLES:
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
*((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables));
break;
/* From release 8.00 this will always return TRUE because NOPARTIAL is

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -65,14 +65,20 @@ Returns: the number of the named parentheses, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringnumber(const pcre *code, const char *stringname)
#else
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
#endif
{
int rc;
int entrysize;
int top, bot;
uschar *nametable;
pcre_uchar *nametable;
#ifdef COMPILE_PCRE8
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
@ -81,14 +87,26 @@ if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
#ifdef COMPILE_PCRE16
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
bot = 0;
while (top > bot)
{
int mid = (top + bot) / 2;
uschar *entry = nametable + entrysize*mid;
int c = strcmp(stringname, (char *)(entry + 2));
if (c == 0) return (entry[0] << 8) + entry[1];
pcre_uchar *entry = nametable + entrysize*mid;
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
(pcre_uchar *)(entry + IMM2_SIZE));
if (c == 0) return GET2(entry, 0);
if (c > 0) bot = mid + 1; else top = mid;
}
@ -114,15 +132,22 @@ Returns: the length of each entry, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
#else
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
#endif
{
int rc;
int entrysize;
int top, bot;
uschar *nametable, *lastentry;
pcre_uchar *nametable, *lastentry;
#ifdef COMPILE_PCRE8
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
@ -131,30 +156,49 @@ if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
#ifdef COMPILE_PCRE16
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
lastentry = nametable + entrysize * (top - 1);
bot = 0;
while (top > bot)
{
int mid = (top + bot) / 2;
uschar *entry = nametable + entrysize*mid;
int c = strcmp(stringname, (char *)(entry + 2));
pcre_uchar *entry = nametable + entrysize*mid;
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
(pcre_uchar *)(entry + IMM2_SIZE));
if (c == 0)
{
uschar *first = entry;
uschar *last = entry;
pcre_uchar *first = entry;
pcre_uchar *last = entry;
while (first > nametable)
{
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
if (STRCMP_UC_UC((pcre_uchar *)stringname,
(pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
first -= entrysize;
}
while (last < lastentry)
{
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
if (STRCMP_UC_UC((pcre_uchar *)stringname,
(pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
last += entrysize;
}
#ifdef COMPILE_PCRE8
*firstptr = (char *)first;
*lastptr = (char *)last;
#else
*firstptr = (PCRE_UCHAR16 *)first;
*lastptr = (PCRE_UCHAR16 *)last;
#endif
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
@ -164,7 +208,6 @@ return PCRE_ERROR_NOSUBSTRING;
}
#ifdef NOT_USED_IN_GLIB
/*************************************************
* Find first set of multiple named strings *
@ -183,23 +226,39 @@ Returns: the number of the first that is set,
or a negative number on error
*/
#ifdef COMPILE_PCRE8
static int
get_first_set(const pcre *code, const char *stringname, int *ovector)
#else
static int
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
#endif
{
const real_pcre *re = (const real_pcre *)code;
const REAL_PCRE *re = (const REAL_PCRE *)code;
int entrysize;
pcre_uchar *entry;
#ifdef COMPILE_PCRE8
char *first, *last;
uschar *entry;
#else
PCRE_UCHAR16 *first, *last;
#endif
#ifdef COMPILE_PCRE8
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
return pcre_get_stringnumber(code, stringname);
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
#else
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
return pcre16_get_stringnumber(code, stringname);
entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
#endif
if (entrysize <= 0) return entrysize;
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
{
int n = (entry[0] << 8) + entry[1];
int n = GET2(entry, 0);
if (ovector[n*2] >= 0) return n;
}
return (first[0] << 8) + first[1];
return GET2(entry, 0);
}
@ -232,9 +291,15 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, char *buffer, int size)
#else
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
int stringnumber, PCRE_UCHAR16 *buffer, int size)
#endif
{
int yield;
if (stringnumber < 0 || stringnumber >= stringcount)
@ -242,7 +307,7 @@ if (stringnumber < 0 || stringnumber >= stringcount)
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
memcpy(buffer, subject + ovector[stringnumber], yield);
memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
buffer[yield] = 0;
return yield;
}
@ -277,13 +342,25 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, char *buffer, int size)
pcre_copy_named_substring(const pcre *code, const char *subject,
int *ovector, int stringcount, const char *stringname,
char *buffer, int size)
#else
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
int *ovector, int stringcount, PCRE_SPTR16 stringname,
PCRE_UCHAR16 *buffer, int size)
#endif
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
#ifdef COMPILE_PCRE8
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
#else
return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
#endif
}
@ -309,29 +386,39 @@ Returns: if successful: 0
PCRE_ERROR_NOMEMORY (-6) failed to get store
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
#else
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
PCRE_SPTR16 **listptr)
#endif
{
int i;
int size = sizeof(char *);
int size = sizeof(pcre_uchar *);
int double_count = stringcount * 2;
char **stringlist;
char *p;
pcre_uchar **stringlist;
pcre_uchar *p;
for (i = 0; i < double_count; i += 2)
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
stringlist = (char **)(pcre_malloc)(size);
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
#ifdef COMPILE_PCRE8
*listptr = (const char **)stringlist;
p = (char *)(stringlist + stringcount + 1);
#else
*listptr = (PCRE_SPTR16 *)stringlist;
#endif
p = (pcre_uchar *)(stringlist + stringcount + 1);
for (i = 0; i < double_count; i += 2)
{
int len = ovector[i+1] - ovector[i];
memcpy(p, subject + ovector[i], len);
memcpy(p, subject + ovector[i], IN_UCHARS(len));
*stringlist++ = p;
p += len;
*p++ = 0;
@ -348,16 +435,22 @@ return 0;
*************************************************/
/* This function exists for the benefit of people calling PCRE from non-C
programs that can call its functions, but not free() or (pcre_free)() directly.
programs that can call its functions, but not free() or (PUBL(free))()
directly.
Argument: the result of a previous pcre_get_substring_list()
Returns: nothing
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring_list(const char **pointer)
#else
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre16_free_substring_list(PCRE_SPTR16 *pointer)
#endif
{
(pcre_free)((void *)pointer);
(PUBL(free))((void *)pointer);
}
@ -387,21 +480,31 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) substring not present
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
#else
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
int stringnumber, PCRE_SPTR16 *stringptr)
#endif
{
int yield;
char *substring;
pcre_uchar *substring;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
substring = (char *)(pcre_malloc)(yield + 1);
substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
memcpy(substring, subject + ovector[stringnumber], yield);
memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
substring[yield] = 0;
*stringptr = substring;
#ifdef COMPILE_PCRE8
*stringptr = (const char *)substring;
#else
*stringptr = (PCRE_SPTR16)substring;
#endif
return yield;
}
@ -434,13 +537,25 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, const char **stringptr)
pcre_get_named_substring(const pcre *code, const char *subject,
int *ovector, int stringcount, const char *stringname,
const char **stringptr)
#else
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
int *ovector, int stringcount, PCRE_SPTR16 stringname,
PCRE_SPTR16 *stringptr)
#endif
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
#ifdef COMPILE_PCRE8
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
#else
return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
#endif
}
@ -451,18 +566,22 @@ return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
*************************************************/
/* This function exists for the benefit of people calling PCRE from non-C
programs that can call its functions, but not free() or (pcre_free)() directly.
programs that can call its functions, but not free() or (PUBL(free))()
directly.
Argument: the result of a previous pcre_get_substring()
Returns: nothing
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring(const char *pointer)
#else
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre16_free_substring(PCRE_SPTR16 pointer)
#endif
{
(pcre_free)((void *)pointer);
(PUBL(free))((void *)pointer);
}
#endif
/* End of pcre_get.c */

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -67,10 +67,18 @@ static void LocalPcreFree(void* aPtr)
{
free(aPtr);
}
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc;
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = LocalPcreFree;
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
#elif !defined VPCOMPAT
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = free;
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free;
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
#endif
/* End of pcre_globals.c */

File diff suppressed because it is too large Load Diff

6915
glib/pcre/pcre_jit_compile.c Normal file

File diff suppressed because it is too large Load Diff

148
glib/pcre/pcre_maketables.c Normal file
View File

@ -0,0 +1,148 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_maketables(), which builds
character tables for PCRE in the current locale. The file is compiled on its
own as part of the PCRE library. However, it is also included in the
compilation of dftables.c, in which case the macro DFTABLES is defined. */
#ifndef DFTABLES
# ifdef HAVE_CONFIG_H
# include "config.h"
# endif
# include "pcre_internal.h"
#endif
/*************************************************
* Create PCRE character tables *
*************************************************/
/* This function builds a set of character tables for use by PCRE and returns
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via PUBL(malloc)(), but when
compiled inside dftables, use malloc().
Arguments: none
Returns: pointer to the contiguous block of data
*/
#ifdef COMPILE_PCRE8
const unsigned char *
pcre_maketables(void)
#else
const unsigned char *
pcre16_maketables(void)
#endif
{
unsigned char *yield, *p;
int i;
#ifndef DFTABLES
yield = (unsigned char*)(PUBL(malloc))(tables_length);
#else
yield = (unsigned char*)malloc(tables_length);
#endif
if (yield == NULL) return NULL;
p = yield;
/* First comes the lower casing table */
for (i = 0; i < 256; i++) *p++ = tolower(i);
/* Next the case-flipping table */
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
/* Then the character class tables. Don't try to be clever and save effort on
exclusive ones - in some locales things may be different. Note that the table
for "space" includes everything "isspace" gives, including VT in the default
locale. This makes it work for the POSIX class [:space:]. Note also that it is
possible for a character to be alnum or alpha without being lower or upper,
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
least under Debian Linux's locales as of 12/2005). So we must test for alnum
specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
}
p += cbit_length;
/* Finally, the character type table. In this, we exclude VT from the white
space chars, because Perl doesn't recognize it as such for \s and for comments
within regexes. */
for (i = 0; i < 256; i++)
{
int x = 0;
if (i != 0x0b && isspace(i)) x += ctype_space;
if (isalpha(i)) x += ctype_letter;
if (isdigit(i)) x += ctype_digit;
if (isxdigit(i)) x += ctype_xdigit;
if (isalnum(i) || i == '_') x += ctype_word;
/* Note: strchr includes the terminating zero in the characters it considers.
In this instance, that is ok because we want binary zero to be flagged as a
meta-character, which in this sense is any character that terminates a run
of data characters. */
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
*p++ = x;
}
return yield;
}
/* End of pcre_maketables.c */

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2009 University of Cambridge
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -67,16 +67,25 @@ Arguments:
type the newline type
endptr pointer to the end of the string
lenptr where to return the length
utf8 TRUE if in utf8 mode
utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
_pcre_is_newline(USPTR ptr, int type, USPTR endptr, int *lenptr, BOOL utf8)
PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
BOOL utf)
{
int c;
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
(void)utf;
#ifdef SUPPORT_UTF
if (utf)
{
GETCHAR(c, ptr);
}
else
#endif /* SUPPORT_UTF */
c = *ptr;
if (type == NLTYPE_ANYCRLF) switch(c)
{
@ -95,9 +104,15 @@ else switch(c)
case 0x000c: *lenptr = 1; return TRUE; /* FF */
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
return TRUE; /* CR */
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
#ifdef COMPILE_PCRE8
case 0x0085: *lenptr = utf? 2 : 1; return TRUE; /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
#else
case 0x0085: /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */
#endif /* COMPILE_PCRE8 */
default: return FALSE;
}
}
@ -116,26 +131,27 @@ Arguments:
type the newline type
startptr pointer to the start of the string
lenptr where to return the length
utf8 TRUE if in utf8 mode
utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
_pcre_was_newline(USPTR ptr, int type, USPTR startptr, int *lenptr, BOOL utf8)
PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
BOOL utf)
{
int c;
(void)utf;
ptr--;
#ifdef SUPPORT_UTF8
if (utf8)
#ifdef SUPPORT_UTF
if (utf)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
else c = *ptr;
#else /* no UTF-8 support */
c = *ptr;
#endif /* SUPPORT_UTF8 */
else
#endif /* SUPPORT_UTF */
c = *ptr;
if (type == NLTYPE_ANYCRLF) switch(c)
{
@ -152,9 +168,15 @@ else switch(c)
case 0x000b: /* VT */
case 0x000c: /* FF */
case 0x000d: *lenptr = 1; return TRUE; /* CR */
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
#ifdef COMPILE_PCRE8
case 0x0085: *lenptr = utf? 2 : 1; return TRUE; /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
#else
case 0x0085: /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */
#endif /* COMPILE_PCRE8 */
default: return FALSE;
}
}

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -52,35 +52,45 @@ character value into a UTF8 string. */
* Convert character value to UTF-8 *
*************************************************/
/* This function takes an integer value in the range 0 - 0x7fffffff
and encodes it as a UTF-8 character in 0 to 6 bytes.
/* This function takes an integer value in the range 0 - 0x10ffff
and encodes it as a UTF-8 character in 1 to 6 pcre_uchars.
Arguments:
cvalue the character value
buffer pointer to buffer for result - at least 6 bytes long
buffer pointer to buffer for result - at least 6 pcre_uchars long
Returns: number of characters placed in the buffer
*/
int
_pcre_ord2utf8(int cvalue, uschar *buffer)
PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
{
#ifdef SUPPORT_UTF8
#ifdef SUPPORT_UTF
register int i, j;
for (i = 0; i < _pcre_utf8_table1_size; i++)
if (cvalue <= _pcre_utf8_table1[i]) break;
/* Checking invalid cvalue character, encoded as invalid UTF-16 character.
Should never happen in practice. */
if ((cvalue & 0xf800) == 0xd800 || cvalue >= 0x110000)
cvalue = 0xfffe;
for (i = 0; i < PRIV(utf8_table1_size); i++)
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
buffer += i;
for (j = i; j > 0; j--)
{
*buffer-- = 0x80 | (cvalue & 0x3f);
cvalue >>= 6;
}
*buffer = _pcre_utf8_table2[i] | cvalue;
*buffer = PRIV(utf8_table2)[i] | cvalue;
return i + 1;
#else
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
(void)(buffer); /* called when SUPPORT_UTF is not defined. */
return 0;
#endif
}

89
glib/pcre/pcre_refcount.c Normal file
View File

@ -0,0 +1,89 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_refcount(), which is an
auxiliary function that can be used to maintain a reference count in a compiled
pattern data block. This might be helpful in applications where the block is
shared by different users. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Maintain reference count *
*************************************************/
/* The reference count is a 16-bit field, initialized to zero. It is not
possible to transfer a non-zero count from one host to a different host that
has a different byte order - though I can't see why anyone in their right mind
would ever want to do that!
Arguments:
argument_re points to compiled code
adjust value to add to the count
Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_refcount(pcre *argument_re, int adjust)
#else
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_refcount(pcre16 *argument_re, int adjust)
#endif
{
REAL_PCRE *re = (REAL_PCRE *)argument_re;
if (re == NULL) return PCRE_ERROR_NULL;
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->ref_count = (-adjust > re->ref_count)? 0 :
(adjust + re->ref_count > 65535)? 65535 :
re->ref_count + adjust;
return re->ref_count;
}
/* End of pcre_refcount.c */

View File

@ -0,0 +1,168 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that is used to match an extended
class. It is used by both pcre_exec() and pcre_def_exec(). */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
#ifndef COMPILE_PCRE8
/*************************************************
* Compare string utilities *
*************************************************/
/* The following two functions compares two strings. Basically an strcmp
for non 8 bit characters.
Arguments:
str1 first string
str2 second string
Returns: 0 if both string are equal (like strcmp), 1 otherwise
*/
int
PRIV(strcmp_uc_uc)(const pcre_uchar *str1, const pcre_uchar *str2)
{
pcre_uchar c1;
pcre_uchar c2;
while (*str1 != '\0' || *str2 != '\0')
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
/* Both length and characters must be equal. */
return 0;
}
int
PRIV(strcmp_uc_c8)(const pcre_uchar *str1, const char *str2)
{
const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
pcre_uchar c1;
pcre_uchar c2;
while (*str1 != '\0' || *ustr2 != '\0')
{
c1 = *str1++;
c2 = (pcre_uchar)*ustr2++;
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
/* Both length and characters must be equal. */
return 0;
}
/* The following two functions compares two, fixed length
strings. Basically an strncmp for non 8 bit characters.
Arguments:
str1 first string
str2 second string
num size of the string
Returns: 0 if both string are equal (like strcmp), 1 otherwise
*/
int
PRIV(strncmp_uc_uc)(const pcre_uchar *str1, const pcre_uchar *str2, unsigned int num)
{
pcre_uchar c1;
pcre_uchar c2;
while (num-- > 0)
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
/* Both length and characters must be equal. */
return 0;
}
int
PRIV(strncmp_uc_c8)(const pcre_uchar *str1, const char *str2, unsigned int num)
{
const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
pcre_uchar c1;
pcre_uchar c2;
while (num-- > 0)
{
c1 = *str1++;
c2 = (pcre_uchar)*ustr2++;
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
/* Both length and characters must be equal. */
return 0;
}
/* The following function returns with the length of
a zero terminated string. Basically an strlen for non 8 bit characters.
Arguments:
str string
Returns: length of the string
*/
unsigned int
PRIV(strlen_uc)(const pcre_uchar *str)
{
unsigned int len = 0;
while (*str++ != 0)
len++;
return len;
}
#endif /* COMPILE_PCRE8 */
/* End of pcre_string_utils.c */

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2009 University of Cambridge
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -37,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE_INCLUDED
/* This module contains some fixed tables that are used by more than one of the
PCRE code modules. The tables are also #included by the pcretest program, which
@ -50,11 +51,12 @@ clashes with the library. */
#include "pcre_internal.h"
#endif /* PCRE_INCLUDED */
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre_internal.h. */
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS };
@ -65,31 +67,38 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
/* These are the breakpoints for different numbers of bytes in a UTF-8
character. */
#ifdef SUPPORT_UTF8
#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
|| (defined PCRE_INCLUDED && defined SUPPORT_PCRE16)
const int _pcre_utf8_table1[] =
/* These tables are also required by pcretest in 16 bit mode. */
const int PRIV(utf8_table1)[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
/* These are the indicator bits and the mask for the data bits to set in the
first byte of a character, indexed by the number of additional bytes. */
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
/* Table of the number of extra bytes, indexed by the first byte masked with
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
const uschar _pcre_utf8_table4[] = {
const pcre_uint8 PRIV(utf8_table4)[] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/
#ifdef SUPPORT_UTF
/* Table to translate from particular type value to the general value. */
const int _pcre_ucp_gentype[] = {
const int PRIV(ucp_gentype)[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
@ -100,6 +109,21 @@ const int _pcre_ucp_gentype[] = {
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
#ifdef SUPPORT_JIT
/* This table reverses PRIV(ucp_gentype). We can save the cost
of a memory load. */
const int PRIV(ucp_typerange)[] = {
ucp_Cc, ucp_Cs,
ucp_Ll, ucp_Lu,
ucp_Mc, ucp_Mn,
ucp_Nd, ucp_No,
ucp_Pc, ucp_Ps,
ucp_Sc, ucp_So,
ucp_Zl, ucp_Zs,
};
#endif /* SUPPORT_JIT */
/* The pcre_utt[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first
@ -110,7 +134,7 @@ table itself. Maintenance is more error-prone, but frequent changes to this
data are unlikely.
July 2008: There is now a script called maint/GenerateUtt.py that can be used
to generate this data instead of maintaining it entirely by hand.
to generate this data automatically instead of maintaining it by hand.
The script was updated in March 2009 to generate a new EBCDIC-compliant
version. Like all other character and string literals that are compared against
@ -186,9 +210,9 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Lu0 STR_L STR_u "\0"
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
#define STRING_M0 STR_M "\0"
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
#define STRING_Mc0 STR_M STR_c "\0"
#define STRING_Me0 STR_M STR_e "\0"
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
@ -256,7 +280,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Zp0 STR_Z STR_p "\0"
#define STRING_Zs0 STR_Z STR_s "\0"
const char _pcre_utt_names[] =
const char PRIV(utt_names)[] =
STRING_Any0
STRING_Arabic0
STRING_Armenian0
@ -396,7 +420,7 @@ const char _pcre_utt_names[] =
STRING_Zp0
STRING_Zs0;
const ucp_type_table _pcre_utt[] = {
const ucp_type_table PRIV(utt)[] = {
{ 0, PT_ANY, 0 },
{ 4, PT_SC, ucp_Arabic },
{ 11, PT_SC, ucp_Armenian },
@ -537,8 +561,8 @@ const ucp_type_table _pcre_utt[] = {
{ 961, PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
#endif /* SUPPORT_UTF8 */
#endif /* SUPPORT_UTF */
/* End of pcre_tables.c */

View File

@ -1,139 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that tests a compiled pattern to
see if it was compiled with the opposite endianness. If so, it uses an
auxiliary local function to flip the appropriate bytes. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Flip bytes in an integer *
*************************************************/
/* This function is called when the magic number in a regex doesn't match, in
order to flip its bytes to see if we are dealing with a pattern that was
compiled on a host of different endianness. If so, this function is used to
flip other byte values.
Arguments:
value the number to flip
n the number of bytes to flip (assumed to be 2 or 4)
Returns: the flipped value
*/
static unsigned long int
byteflip(unsigned long int value, int n)
{
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
return ((value & 0x000000ff) << 24) |
((value & 0x0000ff00) << 8) |
((value & 0x00ff0000) >> 8) |
((value & 0xff000000) >> 24);
}
/*************************************************
* Test for a byte-flipped compiled regex *
*************************************************/
/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
is, it was compiled on a system of opposite endianness. The function is called
only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
we flip all the relevant values into a different data block, and return it.
Arguments:
re points to the regex
study points to study data, or NULL
internal_re points to a new regex block
internal_study points to a new study block
Returns: the new block if is is indeed a byte-flipped regex
NULL if it is not
*/
real_pcre *
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
const pcre_study_data *study, pcre_study_data *internal_study)
{
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
return NULL;
*internal_re = *re; /* To copy other fields */
internal_re->size = byteflip(re->size, sizeof(re->size));
internal_re->options = byteflip(re->options, sizeof(re->options));
internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags));
internal_re->top_bracket =
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
internal_re->top_backref =
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
internal_re->first_byte =
(pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
internal_re->req_byte =
(pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
internal_re->name_table_offset =
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
internal_re->name_entry_size =
(pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
internal_re->name_count =
(pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
if (study != NULL)
{
*internal_study = *study; /* To copy other fields */
internal_study->size = byteflip(study->size, sizeof(study->size));
internal_study->flags = byteflip(study->flags, sizeof(study->flags));
internal_study->minlength = byteflip(study->minlength,
sizeof(study->minlength));
}
return internal_re;
}
/* End of pcre_tryflipped.c */

2981
glib/pcre/pcre_ucd.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,131 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This file has been modified to use glib instead of the internal table
* in ucptable.c -- Marco Barisione */
/* This module contains code for searching the table of Unicode character
properties. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
#include "ucp.h" /* Category definitions */
/* Table to translate from particular type value to the general value. */
#ifdef NOT_USED_IN_GLIB
static int ucp_gentype[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/*************************************************
* Search table and return type *
*************************************************/
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
Arguments:
c the character value
type_ptr the detailed character type is returned here
script_ptr the script is returned here
Returns: the character type category
*/
int
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
{
/* Note that the Unicode types have the same values in glib and in
* PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
* ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
*type_ptr = g_unichar_type(c);
*script_ptr = g_unichar_get_script(c);
return ucp_gentype[*type_ptr];
}
#endif
/*************************************************
* Search table and return other case *
*************************************************/
/* If the given character is a letter, and there is another case for the
letter, return the other case. Otherwise, return -1.
Arguments:
c the character value
Returns: the other case or NOTACHAR if none
*/
unsigned int
_pcre_ucp_othercase(const unsigned int c)
{
int other_case = NOTACHAR;
if (g_unichar_islower(c))
other_case = g_unichar_toupper(c);
else if (g_unichar_isupper(c))
other_case = g_unichar_tolower(c);
if (other_case == c)
other_case = NOTACHAR;
return other_case;
}
/* End of pcre_ucp_searchfuncs.c */

299
glib/pcre/pcre_valid_utf8.c Normal file
View File

@ -0,0 +1,299 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function for validating UTF-8 character
strings. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Validate a UTF-8 string *
*************************************************/
/* This function is called (optionally) at the start of compile or match, to
check that a supposed UTF-8 string is actually valid. The early check means
that subsequent code can assume it is dealing with a valid string. The check
can be turned off for maximum performance, but the consequences of supplying an
invalid string are then undefined.
Originally, this function checked according to RFC 2279, allowing for values in
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
the canonical format. Once somebody had pointed out RFC 3629 to me (it
obsoletes 2279), additional restrictions were applied. The values are now
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
characters is still checked.
From release 8.13 more information about the details of the error are passed
back in the returned value:
PCRE_UTF8_ERR0 No error
PCRE_UTF8_ERR1 Missing 1 byte at the end of the string
PCRE_UTF8_ERR2 Missing 2 bytes at the end of the string
PCRE_UTF8_ERR3 Missing 3 bytes at the end of the string
PCRE_UTF8_ERR4 Missing 4 bytes at the end of the string
PCRE_UTF8_ERR5 Missing 5 bytes at the end of the string
PCRE_UTF8_ERR6 2nd-byte's two top bits are not 0x80
PCRE_UTF8_ERR7 3rd-byte's two top bits are not 0x80
PCRE_UTF8_ERR8 4th-byte's two top bits are not 0x80
PCRE_UTF8_ERR9 5th-byte's two top bits are not 0x80
PCRE_UTF8_ERR10 6th-byte's two top bits are not 0x80
PCRE_UTF8_ERR11 5-byte character is not permitted by RFC 3629
PCRE_UTF8_ERR12 6-byte character is not permitted by RFC 3629
PCRE_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
PCRE_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted
PCRE_UTF8_ERR15 Overlong 2-byte sequence
PCRE_UTF8_ERR16 Overlong 3-byte sequence
PCRE_UTF8_ERR17 Overlong 4-byte sequence
PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
Arguments:
string points to the string
length length of string, or -1 if the string is zero-terminated
errp pointer to an error position offset variable
Returns: = 0 if the string is a valid UTF-8 string
> 0 otherwise, setting the offset of the bad character
*/
int
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
{
#ifdef SUPPORT_UTF
register PCRE_PUCHAR p;
if (length < 0)
{
for (p = string; *p != 0; p++);
length = (int)(p - string);
}
for (p = string; length-- > 0; p++)
{
register int ab, c, d;
c = *p;
if (c < 128) continue; /* ASCII character */
if (c < 0xc0) /* Isolated 10xx xxxx byte */
{
*erroroffset = (int)(p - string);
return PCRE_UTF8_ERR20;
}
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
{
*erroroffset = (int)(p - string);
return PCRE_UTF8_ERR21;
}
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
if (length < ab)
{
*erroroffset = (int)(p - string); /* Missing bytes */
return ab - length; /* Codes ERR1 to ERR5 */
}
length -= ab; /* Length remaining */
/* Check top bits in the second byte */
if (((d = *(++p)) & 0xc0) != 0x80)
{
*erroroffset = (int)(p - string) - 1;
return PCRE_UTF8_ERR6;
}
/* For each length, check that the remaining bytes start with the 0x80 bit
set and not the 0x40 bit. Then check for an overlong sequence, and for the
excluded range 0xd800 to 0xdfff. */
switch (ab)
{
/* 2-byte character. No further bytes to check for 0x80. Check first byte
for for xx00 000x (overlong sequence). */
case 1: if ((c & 0x3e) == 0)
{
*erroroffset = (int)(p - string) - 1;
return PCRE_UTF8_ERR15;
}
break;
/* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
for 1110 0000, xx0x xxxx (overlong sequence) or
1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
case 2:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR7;
}
if (c == 0xe0 && (d & 0x20) == 0)
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR16;
}
if (c == 0xed && d >= 0xa0)
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR14;
}
break;
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
character greater than 0x0010ffff (f4 8f bf bf) */
case 3:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR8;
}
if (c == 0xf0 && (d & 0x30) == 0)
{
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR17;
}
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
{
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR13;
}
break;
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
rejected by the length test below. However, we do the appropriate tests
here so that overlong sequences get diagnosed, and also in case there is
ever an option for handling these larger code points. */
/* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
1111 1000, xx00 0xxx */
case 4:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR8;
}
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
{
*erroroffset = (int)(p - string) - 4;
return PCRE_UTF8_ERR9;
}
if (c == 0xf8 && (d & 0x38) == 0)
{
*erroroffset = (int)(p - string) - 4;
return PCRE_UTF8_ERR18;
}
break;
/* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
1111 1100, xx00 00xx. */
case 5:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR8;
}
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
{
*erroroffset = (int)(p - string) - 4;
return PCRE_UTF8_ERR9;
}
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
{
*erroroffset = (int)(p - string) - 5;
return PCRE_UTF8_ERR10;
}
if (c == 0xfc && (d & 0x3c) == 0)
{
*erroroffset = (int)(p - string) - 5;
return PCRE_UTF8_ERR19;
}
break;
}
/* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
excluded by RFC 3629. The pointer p is currently at the last byte of the
character. */
if (ab > 3)
{
*erroroffset = (int)(p - string) - ab;
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
}
}
#else /* SUPPORT_UTF */
(void)(string); /* Keep picky compilers happy */
(void)(length);
#endif
return PCRE_UTF8_ERR0; /* This indicates success */
}
/* End of pcre_valid_utf8.c */

95
glib/pcre/pcre_version.c Normal file
View File

@ -0,0 +1,95 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_version(), which returns a
string that identifies the PCRE version that is in use. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Return version string *
*************************************************/
/* These macros are the standard way of turning unquoted text into C strings.
They allow macros like PCRE_MAJOR to be defined without quotes, which is
convenient for user programs that want to test its value. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
production releases. Originally, it was used naively in this code:
return XSTRING(PCRE_MAJOR)
"." XSTRING(PCRE_MINOR)
XSTRING(PCRE_PRERELEASE)
" " XSTRING(PCRE_DATE);
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
STRING(). The C standard states: "If (before argument substitution) any
argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what we
really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. To cope with both ways of
handling this, I had resort to some messy hackery that does a test at run time.
I could find no way of detecting that a macro is defined as an empty string at
pre-processor time. This hack uses a standard trick for avoiding calling
the STRING macro with an empty argument when doing the test. */
#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre_version(void)
#else
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre16_version(void)
#endif
{
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
}
/* End of pcre_version.c */

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2010 University of Cambridge
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -64,46 +64,70 @@ Returns: TRUE if character matches, else FALSE
*/
BOOL
_pcre_xclass(int c, const uschar *data)
PRIV(xclass)(int c, const pcre_uchar *data, BOOL utf)
{
int t;
BOOL negated = (*data & XCL_NOT) != 0;
(void)utf;
#ifdef COMPILE_PCRE8
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
utf = TRUE;
#endif
/* Character values < 256 are matched against a bitmap, if one is present. If
not, we still carry on, because there may be ranges that start below 256 in the
additional data. */
if (c < 256)
{
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
return !negated; /* char found */
if ((*data & XCL_MAP) != 0 &&
(((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
return !negated; /* char found */
}
/* First skip the bit map if present. Then match against the list of Unicode
properties or large chars or ranges that end with a large char. We won't ever
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
if ((*data++ & XCL_MAP) != 0) data += 32;
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
while ((t = *data++) != XCL_END)
{
int x, y;
if (t == XCL_SINGLE)
{
GETCHARINC(x, data);
#ifdef SUPPORT_UTF
if (utf)
{
GETCHARINC(x, data); /* macro generates multiple statements */
}
else
#endif
x = *data++;
if (c == x) return !negated;
}
else if (t == XCL_RANGE)
{
GETCHARINC(x, data);
GETCHARINC(y, data);
#ifdef SUPPORT_UTF
if (utf)
{
GETCHARINC(x, data); /* macro generates multiple statements */
GETCHARINC(y, data); /* macro generates multiple statements */
}
else
#endif
{
x = *data++;
y = *data++;
}
if (c >= x && c <= y) return !negated;
}
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
int chartype = UCD_CHARTYPE(c);
const ucd_record *prop = GET_UCD(c);
switch(*data)
{
@ -112,46 +136,46 @@ while ((t = *data++) != XCL_END)
break;
case PT_LAMP:
if ((chartype == ucp_Lu || chartype == ucp_Ll ||
chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
break;
case PT_GC:
if ((data[1] == _pcre_ucp_gentype[chartype]) == (t == XCL_PROP))
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
return !negated;
break;
case PT_PC:
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
break;
case PT_SC:
if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
break;
case PT_ALNUM:
if ((_pcre_ucp_gentype[chartype] == ucp_L ||
_pcre_ucp_gentype[chartype] == ucp_N) == (t == XCL_PROP))
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
return !negated;
break;
case PT_SPACE: /* Perl space */
if ((_pcre_ucp_gentype[chartype] == ucp_Z ||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
== (t == XCL_PROP))
return !negated;
break;
case PT_PXSPACE: /* POSIX space */
if ((_pcre_ucp_gentype[chartype] == ucp_Z ||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
return !negated;
break;
case PT_WORD:
if ((_pcre_ucp_gentype[chartype] == ucp_L ||
_pcre_ucp_gentype[chartype] == ucp_N || c == CHAR_UNDERSCORE)
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
== (t == XCL_PROP))
return !negated;
break;

146
glib/pcre/pcreposix.h Normal file
View File

@ -0,0 +1,146 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
#ifndef _PCREPOSIX_H
#define _PCREPOSIX_H
/* This is the header for the POSIX wrapper interface to the PCRE Perl-
Compatible Regular Expression library. It defines the things POSIX says should
be there. I hope.
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* Have to include stdlib.h in order to ensure that size_t is defined. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options, mostly defined by POSIX, but with some extras. */
#define REG_ICASE 0x0001 /* Maps to PCRE_CASELESS */
#define REG_NEWLINE 0x0002 /* Maps to PCRE_MULTILINE */
#define REG_NOTBOL 0x0004 /* Maps to PCRE_NOTBOL */
#define REG_NOTEOL 0x0008 /* Maps to PCRE_NOTEOL */
#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE_DOTALL */
#define REG_NOSUB 0x0020 /* Maps to PCRE_NO_AUTO_CAPTURE */
#define REG_UTF8 0x0040 /* NOT defined by POSIX; maps to PCRE_UTF8 */
#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */
#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE_UNGREEDY */
#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE_UCP */
/* This is not used by PCRE, but by defining it we make it easier
to slot PCRE into existing programs that make POSIX calls. */
#define REG_EXTENDED 0
/* Error values. Not all these are relevant or used by the wrapper. */
enum {
REG_ASSERT = 1, /* internal error ? */
REG_BADBR, /* invalid repeat counts in {} */
REG_BADPAT, /* pattern error */
REG_BADRPT, /* ? * + invalid */
REG_EBRACE, /* unbalanced {} */
REG_EBRACK, /* unbalanced [] */
REG_ECOLLATE, /* collation error - not relevant */
REG_ECTYPE, /* bad class */
REG_EESCAPE, /* bad escape sequence */
REG_EMPTY, /* empty expression */
REG_EPAREN, /* unbalanced () */
REG_ERANGE, /* bad range inside [] */
REG_ESIZE, /* expression too big */
REG_ESPACE, /* failed to get memory */
REG_ESUBREG, /* bad back reference */
REG_INVARG, /* bad argument */
REG_NOMATCH /* match failed */
};
/* The structure representing a compiled regular expression. */
typedef struct {
void *re_pcre;
size_t re_nsub;
size_t re_erroffset;
} regex_t;
/* The structure in which a captured offset is returned. */
typedef int regoff_t;
typedef struct {
regoff_t rm_so;
regoff_t rm_eo;
} regmatch_t;
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export settings are needed, and are set in pcreposix.c before including this
file. */
#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
# define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
# define PCREPOSIX_EXP_DEFN __declspec(dllimport)
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCREPOSIX_EXP_DECL
# ifdef __cplusplus
# define PCREPOSIX_EXP_DECL extern "C"
# define PCREPOSIX_EXP_DEFN extern "C"
# else
# define PCREPOSIX_EXP_DECL extern
# define PCREPOSIX_EXP_DEFN extern
# endif
#endif
/* The functions */
PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
regmatch_t *, int);
PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
PCREPOSIX_EXP_DECL void regfree(regex_t *);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcreposix.h */

View File

@ -59,104 +59,107 @@ enum {
/* These are the script identifications. */
enum {
ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
ucp_Common = G_UNICODE_SCRIPT_COMMON,
ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
ucp_Greek = G_UNICODE_SCRIPT_GREEK,
ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
ucp_Han = G_UNICODE_SCRIPT_HAN,
ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
ucp_Lao = G_UNICODE_SCRIPT_LAO,
ucp_Latin = G_UNICODE_SCRIPT_LATIN,
ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
ucp_Thai = G_UNICODE_SCRIPT_THAI,
ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
ucp_Yi = G_UNICODE_SCRIPT_YI,
ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
ucp_Nko = G_UNICODE_SCRIPT_NKO,
ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
ucp_Cham = G_UNICODE_SCRIPT_CHAM,
ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
ucp_Vai = G_UNICODE_SCRIPT_VAI,
ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
ucp_Lisu = G_UNICODE_SCRIPT_LISU,
ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
ucp_Batak = G_UNICODE_SCRIPT_BATAK,
ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC
ucp_Arabic,
ucp_Armenian,
ucp_Bengali,
ucp_Bopomofo,
ucp_Braille,
ucp_Buginese,
ucp_Buhid,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Coptic,
ucp_Cypriot,
ucp_Cyrillic,
ucp_Deseret,
ucp_Devanagari,
ucp_Ethiopic,
ucp_Georgian,
ucp_Glagolitic,
ucp_Gothic,
ucp_Greek,
ucp_Gujarati,
ucp_Gurmukhi,
ucp_Han,
ucp_Hangul,
ucp_Hanunoo,
ucp_Hebrew,
ucp_Hiragana,
ucp_Inherited,
ucp_Kannada,
ucp_Katakana,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
ucp_Latin,
ucp_Limbu,
ucp_Linear_B,
ucp_Malayalam,
ucp_Mongolian,
ucp_Myanmar,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Oriya,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Sinhala,
ucp_Syloti_Nagri,
ucp_Syriac,
ucp_Tagalog,
ucp_Tagbanwa,
ucp_Tai_Le,
ucp_Tamil,
ucp_Telugu,
ucp_Thaana,
ucp_Thai,
ucp_Tibetan,
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi,
/* New for Unicode 5.0: */
ucp_Balinese,
ucp_Cuneiform,
ucp_Nko,
ucp_Phags_Pa,
ucp_Phoenician,
/* New for Unicode 5.1: */
ucp_Carian,
ucp_Cham,
ucp_Kayah_Li,
ucp_Lepcha,
ucp_Lycian,
ucp_Lydian,
ucp_Ol_Chiki,
ucp_Rejang,
ucp_Saurashtra,
ucp_Sundanese,
ucp_Vai,
/* New for Unicode 5.2: */
ucp_Avestan,
ucp_Bamum,
ucp_Egyptian_Hieroglyphs,
ucp_Imperial_Aramaic,
ucp_Inscriptional_Pahlavi,
ucp_Inscriptional_Parthian,
ucp_Javanese,
ucp_Kaithi,
ucp_Lisu,
ucp_Meetei_Mayek,
ucp_Old_South_Arabian,
ucp_Old_Turkic,
ucp_Samaritan,
ucp_Tai_Tham,
ucp_Tai_Viet,
/* New for Unicode 6.0.0: */
ucp_Batak,
ucp_Brahmi,
ucp_Mandaic
};
#endif
/* End of ucp.h */