Add GRegex for regular expression matching. (#50075)

2007-03-15  Marco Barisione <marco@barisione.org>

	Add GRegex for regular expression matching.  (#50075)

	* configure.in: Handle GRegex compilation.

	* glib/gregex.c:
	* glib/gregex.h: Code for GRegex.

	* glib/Makefile.am:
	* glib/makefile.msc.in: Updated makefiles.

	* glib/pcre/*: Internal copy of PCRE.

	* glib/update-pcre/*: Stuff to automatically update the internal PCRE
	to a newer version.

	* tests/regex-test.c:
	* tests/Makefile.am:
	* tests/makefile.msc.in: Add tests for GRegex.

svn path=/trunk/; revision=5408
This commit is contained in:
Marco Barisione 2007-03-15 13:01:31 +00:00 committed by Marco Barisione
parent af8671792d
commit 0196d63975
54 changed files with 26185 additions and 11 deletions

View File

@ -1,3 +1,24 @@
2007-03-15 Marco Barisione <marco@barisione.org>
Add GRegex for regular expression matching. (#50075)
* configure.in: Handle GRegex compilation.
* glib/gregex.c:
* glib/gregex.h: Code for GRegex.
* glib/Makefile.am:
* glib/makefile.msc.in: Updated makefiles.
* glib/pcre/*: Internal copy of PCRE.
* glib/update-pcre/*: Stuff to automatically update the internal PCRE
to a newer version.
* tests/regex-test.c:
* tests/Makefile.am:
* tests/makefile.msc.in: Add tests for GRegex.
2007-03-15 Chris Wilson <chris@chris-wilson.co.uk> 2007-03-15 Chris Wilson <chris@chris-wilson.co.uk>
* glib/gmain.c (g_main_dispatch): Replace a * glib/gmain.c (g_main_dispatch): Replace a

View File

@ -173,7 +173,7 @@ AM_CONDITIONAL(MS_LIB_AVAILABLE, [test x$ms_librarian = xyes])
if test "$glib_native_win32" != yes; then if test "$glib_native_win32" != yes; then
# libtool option to control which symbols are exported # libtool option to control which symbols are exported
# right now, symbols starting with _ are not exported # right now, symbols starting with _ are not exported
LIBTOOL_EXPORT_OPTIONS='-export-symbols-regex "^[[^_]].*"' LIBTOOL_EXPORT_OPTIONS='-export-symbols-regex "^g.*"'
else else
# We currently use .def files on Windows # We currently use .def files on Windows
LIBTOOL_EXPORT_OPTIONS= LIBTOOL_EXPORT_OPTIONS=
@ -2146,6 +2146,74 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([[
[broken_poll="no (cross compiling)"]) [broken_poll="no (cross compiling)"])
AC_MSG_RESULT($broken_poll) AC_MSG_RESULT($broken_poll)
dnl *********************
dnl *** GRegex checks ***
dnl *********************
PCRE_REQUIRED_VERSION=7.0
# Check if we should compile GRegex
AC_ARG_ENABLE(regex, AC_HELP_STRING([--disable-regex],
[disable the compilation of GRegex]),
[case "${enableval}" in
yes) enable_regex=true ;;
no) enable_regex=false ;;
*) AC_MSG_ERROR(bad value ${enableval} for --enable-regex) ;;
esac],
[enable_regex=true])
AM_CONDITIONAL(ENABLE_REGEX, $enable_regex)
if test x$enable_regex = xtrue; then
# Check if we should use the internal or the system-supplied pcre
AC_ARG_WITH(pcre,
[AC_HELP_STRING([--with-pcre=@<:@internal/system@:>@],
[specify whether to use the internal or the
system-supplied PCRE library])])
AM_CONDITIONAL(USE_SYSTEM_PCRE, [test "x$with_pcre" = xsystem])
if test "x$with_pcre" = xsystem; then
PKG_CHECK_MODULES(PCRE,
libpcre >= $PCRE_REQUIRED_VERSION)
AC_CACHE_CHECK([for Unicode support in PCRE],glib_cv_pcre_has_unicode,[
CFLAGS="$PCRE_CFLAGS" LDFLAGS="$PCRE_LIBS"
AC_TRY_RUN([#include <pcre.h>
int main () {
int support;
pcre_config (PCRE_CONFIG_UTF8, &support);
if (!support)
return 1;
pcre_config (PCRE_CONFIG_UNICODE_PROPERTIES, &support);
if (!support)
return 1;
return 0;
}],
glib_cv_pcre_has_unicode=yes,
glib_cv_pcre_has_unicode=no,
glib_cv_pcre_has_unicode=yes)])
if test "$glib_cv_pcre_has_unicode" = "no"; then
AC_MSG_ERROR([*** The system-supplied PCRE does not support Unicode properties or UTF-8.])
fi
AC_SUBST(PCRE_CFLAGS)
AC_SUBST(PCRE_LIBS)
AC_DEFINE(USE_SYSTEM_PCRE, [], [using the system-supplied PCRE library])
else
# If using gcc 4 pass -Wno-pointer-sign when compiling the internal PCRE
if test x"$GCC" = xyes; then
AC_MSG_CHECKING([whether gcc understands -Wno-pointer-sign])
if test [`$CC --version | sed -e 's/[^0-9]*\([0-9]\).*/\1/' -e q`] -ge 4; then
PCRE_WARN_CFLAGS="$PCRE_WARN_CFLAGS -Wno-pointer-sign"
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
fi
fi
fi
AC_SUBST(PCRE_WARN_CFLAGS)
else
AM_CONDITIONAL(USE_SYSTEM_PCRE, false])
fi
dnl ********************** dnl **********************
dnl *** Win32 API libs *** dnl *** Win32 API libs ***
dnl ********************** dnl **********************
@ -2864,6 +2932,8 @@ Makefile
glib/Makefile glib/Makefile
glib/libcharset/Makefile glib/libcharset/Makefile
glib/gnulib/Makefile glib/gnulib/Makefile
glib/pcre/Makefile
glib/update-pcre/Makefile
gmodule/Makefile gmodule/Makefile
gmodule/gmoduleconf.h gmodule/gmoduleconf.h
gobject/Makefile gobject/Makefile

View File

@ -1,3 +1,17 @@
2007-03-15 Marco Barisione <marco@barisione.org>
Add GRegex for regular expression matching. (#50075)
* glib/Makefile.am:
* glib/glib-docs.sgml:
* glib/glib-sections.txt:
* glib/tmpl/glib-unused.sgml:
* glib/regex-syntax.sgml:
* glib/tmpl/gregex-unused.sgml:
* glib/tmpl/gregex.sgml: Add GRegex.
* glib/building.sgml: Document build options for GRegex.
2007-03-14 Stefan Kost <ensonic@users.sf.net> 2007-03-14 Stefan Kost <ensonic@users.sf.net>
* gobject/tmpl/gparamspec.sgml: * gobject/tmpl/gparamspec.sgml:

View File

@ -37,7 +37,9 @@ IGNORE_HFILES= \
gmirroringtable.h \ gmirroringtable.h \
gscripttable.h \ gscripttable.h \
glib-mirroring-tab \ glib-mirroring-tab \
gnulib gnulib \
pcre \
update-pcre
# Extra options to supply to gtkdoc-mkdb # Extra options to supply to gtkdoc-mkdb
MKDB_OPTIONS=--sgml-mode --output-format=xml --ignore-files=trio MKDB_OPTIONS=--sgml-mode --output-format=xml --ignore-files=trio
@ -55,6 +57,7 @@ content_files = \
changes.sgml \ changes.sgml \
compiling.sgml \ compiling.sgml \
resources.sgml \ resources.sgml \
regex-syntax.sgml \
version.xml \ version.xml \
glib-gettextize.xml glib-gettextize.xml

View File

@ -146,6 +146,16 @@ How to compile GLib itself
e.g. POSIX threads, DCE threads or Solaris threads. e.g. POSIX threads, DCE threads or Solaris threads.
</para> </para>
</listitem> </listitem>
<listitem>
<para>
GRegex uses the the <ulink url="http://www.pcre.org/">PCRE library</ulink>
for regular expression matching. The default is to use the internal
version of PCRE that is patched to use GLib for memory management
and Unicode handling. If you prefer to use the system-supplied PCRE
library you can pass the --with-pcre=system option to configure,
but it is not recommended.
</para>
</listitem>
</itemizedlist> </itemizedlist>
</refsect1> </refsect1>
@ -177,6 +187,13 @@ How to compile GLib itself
<group> <group>
<arg>--with-threads=[none|posix|dce|win32]</arg> <arg>--with-threads=[none|posix|dce|win32]</arg>
</group> </group>
<group>
<arg>--disable-regex</arg>
<arg>--enable-regex</arg>
</group>
<group>
<arg>--with-pcre=[internal|system]</arg>
</group>
<group> <group>
<arg>--disable-included-printf</arg> <arg>--disable-included-printf</arg>
<arg>--enable-included-printf</arg> <arg>--enable-included-printf</arg>
@ -361,6 +378,61 @@ How to compile GLib itself
</para> </para>
</formalpara> </formalpara>
<formalpara>
<title><systemitem>--disable-regex</systemitem> and
<systemitem>--enable-regex</systemitem></title>
<para>
Do not compile GLib with regular expression support.
GLib will be smaller because it will not need the
PCRE library. This is however not recommended, as
programs may need GRegex.
</para>
</formalpara>
<formalpara>
<title><systemitem>--with-pcre</systemitem></title>
<para>
Specify whether to use the internal or the system-supplied
PCRE library.
<itemizedlist>
<listitem><para>
'internal' means that GRegex will be compiled to use
the internal PCRE library.
</para></listitem>
<listitem><para>
'system' means that GRegex will be compiled to use
the system-supplied PCRE library.
</para></listitem>
</itemizedlist>
Using the internal PCRE is the preferred solution:
<itemizedlist>
<listitem>
<para>
System-supplied PCRE has a separated copy of the big tables
used for Unicode handling.
</para>
</listitem>
<listitem>
<para>
Some systems have PCRE libraries compiled without some needed
features, such as UTF-8 and Unicode support.
</para>
</listitem>
<listitem>
<para>
PCRE uses some global variables for memory management and
other features. In the rare case of a program using both
GRegex and PCRE (maybe indirectly through a library),
this variables could lead to problems when they are modified.
</para>
</listitem>
</itemizedlist>
</para>
</formalpara>
<formalpara> <formalpara>
<title><systemitem>--disable-included-printf</systemitem> and <title><systemitem>--disable-included-printf</systemitem> and
<systemitem>--enable-included-printf</systemitem></title> <systemitem>--enable-included-printf</systemitem></title>

View File

@ -61,6 +61,7 @@
<!ENTITY glib-Bookmarkfile SYSTEM "xml/bookmarkfile.xml"> <!ENTITY glib-Bookmarkfile SYSTEM "xml/bookmarkfile.xml">
<!ENTITY glib-Base64 SYSTEM "xml/base64.xml"> <!ENTITY glib-Base64 SYSTEM "xml/base64.xml">
<!ENTITY glib-i18n SYSTEM "xml/i18n.xml"> <!ENTITY glib-i18n SYSTEM "xml/i18n.xml">
<!ENTITY glib-Regex SYSTEM "xml/gregex.xml">
<!ENTITY glib-Version SYSTEM "xml/version.xml"> <!ENTITY glib-Version SYSTEM "xml/version.xml">
<!ENTITY glib-Compiling SYSTEM "compiling.sgml"> <!ENTITY glib-Compiling SYSTEM "compiling.sgml">
@ -69,6 +70,7 @@
<!ENTITY glib-Running SYSTEM "running.sgml"> <!ENTITY glib-Running SYSTEM "running.sgml">
<!ENTITY glib-Resources SYSTEM "resources.sgml"> <!ENTITY glib-Resources SYSTEM "resources.sgml">
<!ENTITY glib-Changes SYSTEM "changes.sgml"> <!ENTITY glib-Changes SYSTEM "changes.sgml">
<!ENTITY glib-RegexSyntax SYSTEM "regex-syntax.sgml">
<!ENTITY glib-gettextize SYSTEM "glib-gettextize.xml"> <!ENTITY glib-gettextize SYSTEM "glib-gettextize.xml">
@ -101,6 +103,7 @@ synchronize their operation.
&glib-Compiling; &glib-Compiling;
&glib-Running; &glib-Running;
&glib-Changes; &glib-Changes;
&glib-RegexSyntax;
&glib-Resources; &glib-Resources;
</chapter> </chapter>
@ -151,6 +154,7 @@ synchronize their operation.
&glib-Shell; &glib-Shell;
&glib-Option; &glib-Option;
&glib-Pattern-Matching; &glib-Pattern-Matching;
&glib-Regex;
&glib-Markup; &glib-Markup;
&glib-Keyfile; &glib-Keyfile;
&glib-Bookmarkfile; &glib-Bookmarkfile;

View File

@ -863,6 +863,50 @@ g_pattern_match_string
g_pattern_match_simple g_pattern_match_simple
</SECTION> </SECTION>
<SECTION>
<TITLE>Perl-compatible regular expressions</TITLE>
<FILE>gregex</FILE>
GRegexError
G_REGEX_ERROR
GRegexCompileFlags
GRegexMatchFlags
GRegex
GRegexEvalCallback
g_regex_new
g_regex_free
g_regex_optimize
g_regex_copy
g_regex_get_pattern
g_regex_clear
g_regex_match_simple
g_regex_match
g_regex_match_full
g_regex_match_next
g_regex_match_next_full
g_regex_match_all
g_regex_match_all_full
g_regex_get_match_count
g_regex_is_partial_match
g_regex_fetch
g_regex_fetch_pos
g_regex_fetch_named
g_regex_fetch_named_pos
g_regex_fetch_all
g_regex_get_string_number
g_regex_split_simple
g_regex_split
g_regex_split_full
g_regex_split_next
g_regex_split_next_full
g_regex_expand_references
g_regex_replace
g_regex_replace_literal
g_regex_replace_eval
g_regex_escape_string
<SUBSECTION Private>
g_regex_error_quark
</SECTION>
<SECTION> <SECTION>
<TITLE>Message Logging</TITLE> <TITLE>Message Logging</TITLE>
<FILE>messages</FILE> <FILE>messages</FILE>

File diff suppressed because it is too large Load Diff

View File

@ -712,6 +712,13 @@ To use this function you must configure glib with the flag
@mem: the memory to check. @mem: the memory to check.
<!-- ##### FUNCTION g_regex_error_quark ##### -->
<para>
</para>
@Returns:
<!-- ##### FUNCTION g_scanner_stat_mode ##### --> <!-- ##### FUNCTION g_scanner_stat_mode ##### -->
<para> <para>
Gets the file attributes. Gets the file attributes.

View File

@ -0,0 +1,578 @@
<!-- ##### SECTION Title ##### -->
Perl-compatible regular expressions
<!-- ##### SECTION Short_Description ##### -->
matches strings against regular expressions.
<!-- ##### SECTION Long_Description ##### -->
<para>
The <function>g_regex_*()</function> functions implement regular
expression pattern matching using syntax and semantics similar to
Perl regular expression.
</para>
<para>
Some functions accept a <parameter>start_position</parameter> argument,
setting it differs from just passing over a shortened string and setting
#G_REGEX_MATCH_NOTBOL in the case of a pattern that begins with any kind
of lookbehind assertion.
For example, consider the pattern "\Biss\B" which finds occurrences of "iss"
in the middle of words. ("\B" matches only if the current position in the
subject is not a word boundary.) When applied to the string "Mississipi"
from the fourth byte, namely "issipi", it does not match, because "\B" is
always false at the start of the subject, which is deemed to be a word
boundary. However, if the entire string is passed , but with
<parameter>start_position</parameter> set to 4, it finds the second
occurrence of "iss" because it is able to look behind the starting point
to discover that it is preceded by a letter.
</para>
<para>
Note that, unless you set the #G_REGEX_RAW flag, all the strings passed
to these functions must be encoded in UTF-8. The lengths and the positions
inside the strings are in bytes and not in characters, so, for instance,
"\xc3\xa0" (i.e. "&agrave;") is two bytes long but it is treated as a single
character. If you set #G_REGEX_RAW the strings can be non-valid UTF-8
strings and a byte is treated as a character, so "\xc3\xa0" is two bytes
and two characters long.
</para>
<para>
When matching a pattern, "\n" matches only against a "\n" character in the
string, and "\r" matches only a "\r" character. To match any newline sequence
use "\R". This particular group matches either the two-character sequence
CR + LF ("\r\n"), or one of the single characters LF (linefeed, U+000A, "\n"), VT
(vertical tab, U+000B, "\v"), FF (formfeed, U+000C, "\f"), CR (carriage return,
U+000D, "\r"), NEL (next line, U+0085), LS (line separator, U+2028), or PS
(paragraph separator, U+2029).
</para>
<para>
The behaviour of the dot, circumflex, and dollar metacharacters are affected by
newline characters, the default is to recognize any newline character (the same
characters recognized by "\R"). This can be changed with #G_REGEX_NEWLINE_CR,
#G_REGEX_NEWLINE_LF and #G_REGEX_NEWLINE_CRLF compile options,
and with #G_REGEX_MATCH_NEWLINE_ANY, #G_REGEX_MATCH_NEWLINE_CR,
#G_REGEX_MATCH_NEWLINE_LF and #G_REGEX_MATCH_NEWLINE_CRLF match options.
These settings are also relevant when compiling a pattern if
#G_REGEX_EXTENDED is set, and an unescaped "#" outside a character class is
encountered. This indicates a comment that lasts until after the next
newline.
</para>
<para>
If you have two threads manipulating the same #GRegex, they must use a
lock to synchronize their operation, as these functions are not threadsafe.
Creating and manipulating different #GRegex structures from different
threads is not a problem.
</para>
<para>
The regular expressions low level functionalities are obtained through
the excellent <ulink url="http://www.pcre.org/">PCRE</ulink> library
written by Philip Hazel.
</para>
<!-- ##### SECTION See_Also ##### -->
<para>
</para>
<!-- ##### SECTION Stability_Level ##### -->
<!-- ##### ENUM GRegexError ##### -->
<para>
Error codes returned by regular expressions functions.
</para>
@G_REGEX_ERROR_COMPILE: Compilation of the regular expression in <function>g_regex_new()</function> failed.
@G_REGEX_ERROR_OPTIMIZE: Optimization of the regular expression in <function>g_regex_optimize()</function> failed.
@G_REGEX_ERROR_REPLACE: Replacement failed due to an ill-formed replacement string.
@G_REGEX_ERROR_MATCH: The match process failed.
@Since: 2.14
<!-- ##### MACRO G_REGEX_ERROR ##### -->
<para>
Error domain for regular expressions. Errors in this domain will be from the #GRegexError enumeration. See #GError for information on error domains.
</para>
@Since: 2.14
<!-- ##### ENUM GRegexCompileFlags ##### -->
<para>
Flags specifying compile-time options.
</para>
@G_REGEX_CASELESS: Letters in the pattern match both upper and lower case
letters. It be changed within a pattern by a "(?i)" option setting.
@G_REGEX_MULTILINE: By default, GRegex treats the strings as consisting
of a single line of characters (even if it actually contains newlines).
The "start of line" metacharacter ("^") matches only at the start of the
string, while the "end of line" metacharacter ("$") matches only at the
end of the string, or before a terminating newline (unless
#G_REGEX_DOLLAR_ENDONLY is set). When #G_REGEX_MULTILINE is set,
the "start of line" and "end of line" constructs match immediately following
or immediately before any newline in the string, respectively, as well
as at the very start and end. This can be changed within a pattern by a
"(?m)" option setting.
@G_REGEX_DOTALL: A dot metacharater (".") in the pattern matches all
characters, including newlines. Without it, newlines are excluded. This
option can be changed within a pattern by a ("?s") option setting.
@G_REGEX_EXTENDED: Whitespace data characters in the pattern are
totally ignored except when escaped or inside a character class.
Whitespace does not include the VT character (code 11). In addition,
characters between an unescaped "#" outside a character class and
the next newline character, inclusive, are also ignored. This can be
changed within a pattern by a "(?x)" option setting.
@G_REGEX_ANCHORED: The pattern is forced to be "anchored", that is,
it is constrained to match only at the first matching point in the string
that is being searched. This effect can also be achieved by appropriate
constructs in the pattern itself such as the "^" metacharater.
@G_REGEX_DOLLAR_ENDONLY: A dollar metacharacter ("$") in the pattern
matches only at the end of the string. Without this option, a dollar also
matches immediately before the final character if it is a newline (but
not before any other newlines). This option is ignored if
#G_REGEX_MULTILINE is set.
@G_REGEX_UNGREEDY: Inverts the "greediness" of the
quantifiers so that they are not greedy by default, but become greedy
if followed by "?". It can also be set by a "(?U)" option setting within
the pattern.
@G_REGEX_RAW: Usually strings must be valid UTF-8 strings, using this
flag they are considered as a raw sequence of bytes.
@G_REGEX_NO_AUTO_CAPTURE: Disables the use of numbered capturing
parentheses in the pattern. Any opening parenthesis that is not followed
by "?" behaves as if it were followed by "?:" but named parentheses can
still be used for capturing (and they acquire numbers in the usual way).
@G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not
be unique. This can be helpful for certain types of pattern when it is known
that only one instance of the named subpattern can ever be matched.
@G_REGEX_NEWLINE_CR: Usually any newline character is recognized, if this
option is set, the only recognized newline character is '\r'.
@G_REGEX_NEWLINE_LF: Usually any newline character is recognized, if this
option is set, the only recognized newline character is '\n'.
@G_REGEX_NEWLINE_CRLF: Usually any newline character is recognized, if this
option is set, the only recognized newline character sequence is '\r\n'.
@Since: 2.14
<!-- ##### ENUM GRegexMatchFlags ##### -->
<para>
Flags specifying match-time options.
</para>
@G_REGEX_MATCH_ANCHORED: The pattern is forced to be "anchored", that is,
it is constrained to match only at the first matching point in the string
that is being searched. This effect can also be achieved by appropriate
constructs in the pattern itself such as the "^" metacharater.
@G_REGEX_MATCH_NOTBOL: Specifies that first character of the string is
not the beginning of a line, so the circumflex metacharacter should not
match before it. Setting this without G_REGEX_MULTILINE (at compile time)
causes circumflex never to match. This option affects only the behaviour of
the circumflex metacharacter, it does not affect "\A".
@G_REGEX_MATCH_NOTEOL: Specifies that the end of the subject string is
not the end of a line, so the dollar metacharacter should not match it nor
(except in multiline mode) a newline immediately before it. Setting this
without G_REGEX_MULTILINE (at compile time) causes dollar never to match.
This option affects only the behaviour of the dollar metacharacter, it does
not affect "\Z" or "\z".
@G_REGEX_MATCH_NOTEMPTY: An empty string is not considered to be a valid
match if this option is set. If there are alternatives in the pattern, they
are tried. If all the alternatives match the empty string, the entire match
fails. For example, if the pattern "a?b?" is applied to a string not beginning
with "a" or "b", it matches the empty string at the start of the string.
With this flag set, this match is not valid, so GRegex searches further
into the string for occurrences of "a" or "b".
@G_REGEX_MATCH_PARTIAL: Turns on the partial matching feature, for more
documentation on partial matching see g_regex_is_partial_match().
@G_REGEX_MATCH_NEWLINE_CR: Overrides the newline definition set when creating
a new #GRegex, setting the '\r' character as line terminator.
@G_REGEX_MATCH_NEWLINE_LF: Overrides the newline definition set when creating
a new #GRegex, setting the '\n' character as line terminator.
@G_REGEX_MATCH_NEWLINE_CRLF: Overrides the newline definition set when creating
a new #GRegex, setting the '\r\n' characters as line terminator.
@G_REGEX_MATCH_NEWLINE_ANY: Overrides the newline definition set when creating
a new #GRegex, any newline character or character sequence is recognized.
@Since: 2.14
<!-- ##### STRUCT GRegex ##### -->
<para>
A GRegex is the "compiled" form of a regular expression pattern. This
structure is opaque and its fields cannot be accessed directly.
</para>
@Since: 2.14
<!-- ##### USER_FUNCTION GRegexEvalCallback ##### -->
<para>
Specifies the type of the function passed to g_regex_replace_eval().
It is called for each occurance of the pattern @regex in @string, and it
should append the replacement to @result.
</para>
<para>
Do not call on @regex functions that modify its internal state, such as
g_regex_match(); if you need it you can create a temporary copy of
@regex using g_regex_copy().
</para>
@Param1: a #GRegex.
@Param2: the string used to perform matches against.
@Param3: a #GString containing the new string.
@Param4: user data passed to g_regex_replace_eval().
@Returns: %FALSE to continue the replacement process, %TRUE to stop it.
@Since: 2.14
<!-- ##### FUNCTION g_regex_new ##### -->
<para>
</para>
@pattern:
@compile_options:
@match_options:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_free ##### -->
<para>
</para>
@regex:
<!-- ##### FUNCTION g_regex_optimize ##### -->
<para>
</para>
@regex:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_copy ##### -->
<para>
</para>
@regex:
@Returns:
<!-- ##### FUNCTION g_regex_get_pattern ##### -->
<para>
</para>
@regex:
@Returns:
<!-- ##### FUNCTION g_regex_clear ##### -->
<para>
</para>
@regex:
<!-- ##### FUNCTION g_regex_match_simple ##### -->
<para>
</para>
@pattern:
@string:
@compile_options:
@match_options:
@Returns:
<!-- ##### FUNCTION g_regex_match ##### -->
<para>
</para>
@regex:
@string:
@match_options:
@Returns:
<!-- ##### FUNCTION g_regex_match_full ##### -->
<para>
</para>
@regex:
@string:
@string_len:
@start_position:
@match_options:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_match_next ##### -->
<para>
</para>
@regex:
@string:
@match_options:
@Returns:
<!-- ##### FUNCTION g_regex_match_next_full ##### -->
<para>
</para>
@regex:
@string:
@string_len:
@start_position:
@match_options:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_match_all ##### -->
<para>
</para>
@regex:
@string:
@match_options:
@Returns:
<!-- ##### FUNCTION g_regex_match_all_full ##### -->
<para>
</para>
@regex:
@string:
@string_len:
@start_position:
@match_options:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_get_match_count ##### -->
<para>
</para>
@regex:
@Returns:
<!-- ##### FUNCTION g_regex_is_partial_match ##### -->
<para>
</para>
@regex:
@Returns:
<!-- ##### FUNCTION g_regex_fetch ##### -->
<para>
</para>
@regex:
@match_num:
@string:
@Returns:
<!-- ##### FUNCTION g_regex_fetch_pos ##### -->
<para>
</para>
@regex:
@match_num:
@start_pos:
@end_pos:
@Returns:
<!-- ##### FUNCTION g_regex_fetch_named ##### -->
<para>
</para>
@regex:
@name:
@string:
@Returns:
<!-- ##### FUNCTION g_regex_fetch_named_pos ##### -->
<para>
</para>
@regex:
@name:
@start_pos:
@end_pos:
@Returns:
<!-- ##### FUNCTION g_regex_fetch_all ##### -->
<para>
</para>
@regex:
@string:
@Returns:
<!-- ##### FUNCTION g_regex_get_string_number ##### -->
<para>
</para>
@regex:
@name:
@Returns:
<!-- ##### FUNCTION g_regex_split_simple ##### -->
<para>
</para>
@pattern:
@string:
@compile_options:
@match_options:
@Returns:
<!-- ##### FUNCTION g_regex_split ##### -->
<para>
</para>
@regex:
@string:
@match_options:
@Returns:
<!-- ##### FUNCTION g_regex_split_full ##### -->
<para>
</para>
@regex:
@string:
@string_len:
@start_position:
@match_options:
@max_tokens:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_split_next ##### -->
<para>
</para>
@regex:
@string:
@match_options:
@Returns:
<!-- ##### FUNCTION g_regex_split_next_full ##### -->
<para>
</para>
@regex:
@string:
@string_len:
@start_position:
@match_options:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_expand_references ##### -->
<para>
</para>
@regex:
@string:
@string_to_expand:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_replace ##### -->
<para>
</para>
@regex:
@string:
@string_len:
@start_position:
@replacement:
@match_options:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_replace_literal ##### -->
<para>
</para>
@regex:
@string:
@string_len:
@start_position:
@replacement:
@match_options:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_replace_eval ##### -->
<para>
</para>
@regex:
@string:
@string_len:
@start_position:
@match_options:
@eval:
@user_data:
@error:
@Returns:
<!-- ##### FUNCTION g_regex_escape_string ##### -->
<para>
</para>
@string:
@length:
@Returns:

View File

@ -6,9 +6,21 @@ PRINTF_SUBDIR = gnulib
printf_la = gnulib/libgnulib.la printf_la = gnulib/libgnulib.la
endif endif
SUBDIRS = libcharset $(PRINTF_SUBDIR) if ENABLE_REGEX
if USE_SYSTEM_PCRE
else
MAYBE_PCRE = pcre
endif
gregex_c = gregex.c
gregex_h = gregex.h
else
gregex_c =
gregex_h =
endif
DIST_SUBDIRS = libcharset gnulib SUBDIRS = libcharset $(PRINTF_SUBDIR) $(MAYBE_PCRE) update-pcre
DIST_SUBDIRS = libcharset gnulib pcre update-pcre
INCLUDES = -I$(top_srcdir) -DG_LOG_DOMAIN=\"GLib\" \ INCLUDES = -I$(top_srcdir) -DG_LOG_DOMAIN=\"GLib\" \
$(GLIB_DEBUG_FLAGS) -DG_DISABLE_DEPRECATED -DGLIB_COMPILATION $(GLIB_DEBUG_FLAGS) -DG_DISABLE_DEPRECATED -DGLIB_COMPILATION
@ -36,6 +48,8 @@ MIRRORING_TAB_SOURCES = \
glib-mirroring-tab/packtab.h \ glib-mirroring-tab/packtab.h \
glib-mirroring-tab/packtab.c glib-mirroring-tab/packtab.c
# The compilation of GRegex can be disabled, but the source files must
# be distributed.
EXTRA_DIST = \ EXTRA_DIST = \
makefile.msc.in \ makefile.msc.in \
glib.rc.in \ glib.rc.in \
@ -45,6 +59,8 @@ EXTRA_DIST = \
abicheck.sh \ abicheck.sh \
pltcheck.sh \ pltcheck.sh \
glib.symbols \ glib.symbols \
gregex.c \
gregex.h \
$(MIRRORING_TAB_SOURCES) $(MIRRORING_TAB_SOURCES)
# These may be in the builddir too # These may be in the builddir too
@ -106,6 +122,7 @@ libglib_2_0_la_SOURCES = \
gqueue.c \ gqueue.c \
grel.c \ grel.c \
grand.c \ grand.c \
$(gregex_c) \
gscanner.c \ gscanner.c \
gscripttable.h \ gscripttable.h \
gsequence.c \ gsequence.c \
@ -185,6 +202,7 @@ glibsubinclude_HEADERS = \
gquark.h \ gquark.h \
gqueue.h \ gqueue.h \
grand.h \ grand.h \
$(gregex_h) \
grel.h \ grel.h \
gscanner.h \ gscanner.h \
gsequence.h \ gsequence.h \
@ -239,7 +257,17 @@ glib_win32_res = glib-win32-res.o
glib_win32_res_ldflag = -Wl,$(glib_win32_res) glib_win32_res_ldflag = -Wl,$(glib_win32_res)
endif endif
libglib_2_0_la_LIBADD = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ @ICONV_LIBS@ @G_LIBS_EXTRA@ if ENABLE_REGEX
if USE_SYSTEM_PCRE
pcre_lib = $(PCRE_LIBS)
else
pcre_lib = pcre/libpcre.la
endif
else
pcre_lib =
endif
libglib_2_0_la_LIBADD = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ @ICONV_LIBS@ @G_LIBS_EXTRA@ $(pcre_lib)
libglib_2_0_la_DEPENDENCIES = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ $(glib_win32_res) @GLIB_DEF@ libglib_2_0_la_DEPENDENCIES = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ $(glib_win32_res) @GLIB_DEF@
libglib_2_0_la_LDFLAGS = \ libglib_2_0_la_LDFLAGS = \

View File

@ -62,6 +62,7 @@
#include <glib/gqueue.h> #include <glib/gqueue.h>
#include <glib/grand.h> #include <glib/grand.h>
#include <glib/grel.h> #include <glib/grel.h>
#include <glib/gregex.h>
#include <glib/gscanner.h> #include <glib/gscanner.h>
#include <glib/gsequence.h> #include <glib/gsequence.h>
#include <glib/gshell.h> #include <glib/gshell.h>

View File

@ -1416,6 +1416,43 @@ g_get_codeset
#endif #endif
#endif #endif
#if IN_HEADER(__G_REGEX_H__)
#if IN_FILE(__G_REGEX_C__)
g_regex_error_quark
g_regex_new
g_regex_free
g_regex_optimize
g_regex_copy
g_regex_get_pattern
g_regex_clear
g_regex_match_simple
g_regex_match
g_regex_match_full
g_regex_match_next
g_regex_match_next_full
g_regex_match_all
g_regex_match_all_full
g_regex_get_match_count
g_regex_is_partial_match
g_regex_fetch
g_regex_fetch_pos
g_regex_fetch_named
g_regex_fetch_named_pos
g_regex_fetch_all
g_regex_get_string_number
g_regex_split_simple
g_regex_split
g_regex_split_full
g_regex_split_next
g_regex_split_next_full
g_regex_expand_references
g_regex_replace
g_regex_replace_literal
g_regex_replace_eval
g_regex_escape_string
#endif
#endif
#if IN_HEADER(__G_WIN32_H__) #if IN_HEADER(__G_WIN32_H__)
#if IN_FILE(__G_WIN32_H__) #if IN_FILE(__G_WIN32_H__)
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32

2448
glib/gregex.c Normal file

File diff suppressed because it is too large Load Diff

197
glib/gregex.h Normal file
View File

@ -0,0 +1,197 @@
/* GRegex -- regular expression API wrapper around PCRE.
*
* Copyright (C) 1999, 2000 Scott Wimer
* Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com>
* Copyright (C) 2005 - 2006, Marco Barisione <marco@barisione.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __G_REGEX_H__
#define __G_REGEX_H__
#include <glib.h>
G_BEGIN_DECLS
typedef enum
{
G_REGEX_ERROR_COMPILE,
G_REGEX_ERROR_OPTIMIZE,
G_REGEX_ERROR_REPLACE,
G_REGEX_ERROR_MATCH
} GRegexError;
#define G_REGEX_ERROR g_regex_error_quark ()
GQuark g_regex_error_quark (void);
/* Remember to update G_REGEX_COMPILE_MASK in gregex.c after
* adding a new flag. */
typedef enum
{
G_REGEX_CASELESS = 1 << 0,
G_REGEX_MULTILINE = 1 << 1,
G_REGEX_DOTALL = 1 << 2,
G_REGEX_EXTENDED = 1 << 3,
G_REGEX_ANCHORED = 1 << 4,
G_REGEX_DOLLAR_ENDONLY = 1 << 5,
G_REGEX_UNGREEDY = 1 << 9,
G_REGEX_RAW = 1 << 11,
G_REGEX_NO_AUTO_CAPTURE = 1 << 12,
G_REGEX_DUPNAMES = 1 << 19,
G_REGEX_NEWLINE_CR = 1 << 20,
G_REGEX_NEWLINE_LF = 1 << 21,
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF
} GRegexCompileFlags;
/* Remember to update G_REGEX_MATCH_MASK in gregex.c after
* adding a new flag. */
typedef enum
{
G_REGEX_MATCH_ANCHORED = 1 << 4,
G_REGEX_MATCH_NOTBOL = 1 << 7,
G_REGEX_MATCH_NOTEOL = 1 << 8,
G_REGEX_MATCH_NOTEMPTY = 1 << 10,
G_REGEX_MATCH_PARTIAL = 1 << 15,
G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
} GRegexMatchFlags;
typedef struct _GRegex GRegex;
typedef gboolean (*GRegexEvalCallback) (const GRegex*, const gchar*, GString*, gpointer);
GRegex *g_regex_new (const gchar *pattern,
GRegexCompileFlags compile_options,
GRegexMatchFlags match_options,
GError **error);
void g_regex_free (GRegex *regex);
gboolean g_regex_optimize (GRegex *regex,
GError **error);
GRegex *g_regex_copy (const GRegex *regex);
const gchar *g_regex_get_pattern (const GRegex *regex);
void g_regex_clear (GRegex *regex);
gboolean g_regex_match_simple (const gchar *pattern,
const gchar *string,
GRegexCompileFlags compile_options,
GRegexMatchFlags match_options);
gboolean g_regex_match (GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options);
gboolean g_regex_match_full (GRegex *regex,
const gchar *string,
gssize string_len,
gint start_position,
GRegexMatchFlags match_options,
GError **error);
gboolean g_regex_match_next (GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options);
gboolean g_regex_match_next_full (GRegex *regex,
const gchar *string,
gssize string_len,
gint start_position,
GRegexMatchFlags match_options,
GError **error);
gboolean g_regex_match_all (GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options);
gboolean g_regex_match_all_full (GRegex *regex,
const gchar *string,
gssize string_len,
gint start_position,
GRegexMatchFlags match_options,
GError **error);
gint g_regex_get_match_count (const GRegex *regex);
gboolean g_regex_is_partial_match (const GRegex *regex);
gchar *g_regex_fetch (const GRegex *regex,
gint match_num,
const gchar *string);
gboolean g_regex_fetch_pos (const GRegex *regex,
gint match_num,
gint *start_pos,
gint *end_pos);
gchar *g_regex_fetch_named (const GRegex *regex,
const gchar *name,
const gchar *string);
gboolean g_regex_fetch_named_pos (const GRegex *regex,
const gchar *name,
gint *start_pos,
gint *end_pos);
gchar **g_regex_fetch_all (const GRegex *regex,
const gchar *string);
gint g_regex_get_string_number (const GRegex *regex,
const gchar *name);
gchar **g_regex_split_simple (const gchar *pattern,
const gchar *string,
GRegexCompileFlags compile_options,
GRegexMatchFlags match_options);
gchar **g_regex_split (GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options);
gchar **g_regex_split_full (GRegex *regex,
const gchar *string,
gssize string_len,
gint start_position,
GRegexMatchFlags match_options,
gint max_tokens,
GError **error);
gchar *g_regex_split_next (GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options);
gchar *g_regex_split_next_full (GRegex *regex,
const gchar *string,
gssize string_len,
gint start_position,
GRegexMatchFlags match_options,
GError **error);
gchar *g_regex_expand_references (GRegex *regex,
const gchar *string,
const gchar *string_to_expand,
GError **error);
gchar *g_regex_replace (GRegex *regex,
const gchar *string,
gssize string_len,
gint start_position,
const gchar *replacement,
GRegexMatchFlags match_options,
GError **error);
gchar *g_regex_replace_literal (GRegex *regex,
const gchar *string,
gssize string_len,
gint start_position,
const gchar *replacement,
GRegexMatchFlags match_options,
GError **error);
gchar *g_regex_replace_eval (GRegex *regex,
const gchar *string,
gssize string_len,
gint start_position,
GRegexMatchFlags match_options,
GRegexEvalCallback eval,
gpointer user_data,
GError **error);
gchar *g_regex_escape_string (const gchar *string,
gint length);
G_END_DECLS
#endif /* __G_REGEX_H__ */

View File

@ -17,6 +17,7 @@ all : \
galias.h \ galias.h \
galiasdef.c \ galiasdef.c \
gnulib\gnulib.lib \ gnulib\gnulib.lib \
pcre\pcre.lib \
libglib-2.0-0.dll \ libglib-2.0-0.dll \
glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib \ glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib \
gspawn-win32-helper.exe \ gspawn-win32-helper.exe \
@ -27,6 +28,11 @@ gnulib\gnulib.lib :
nmake -f makefile.msc nmake -f makefile.msc
cd .. cd ..
pcre\pcre.lib :
cd pcre
nmake -f makefile.msc
cd ..
glib_OBJECTS = \ glib_OBJECTS = \
garray.obj \ garray.obj \
gasyncqueue.obj \ gasyncqueue.obj \
@ -61,6 +67,7 @@ glib_OBJECTS = \
gpattern.obj \ gpattern.obj \
gprintf.obj \ gprintf.obj \
grand.obj \ grand.obj \
gregex.obj \
grel.obj \ grel.obj \
gscanner.obj \ gscanner.obj \
gsequence.obj \ gsequence.obj \
@ -112,12 +119,12 @@ glib.res : glib.rc
# create a static libary # create a static libary
# static library can well have the real version number in the name # static library can well have the real version number in the name
glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib : $(glib_OBJECTS) gnulib\gnulib.lib glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib : $(glib_OBJECTS) gnulib\gnulib.lib pcre\pcre.lib
lib /out:glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib $(glib_OBJECTS) gnulib\gnulib.lib lib /out:glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib $(glib_OBJECTS) gnulib\gnulib.lib pcre\pcre.lib
libglib-2.0-0.dll : $(glib_OBJECTS) gnulib\gnulib.lib glib.def glib.res libglib-2.0-0.dll : $(glib_OBJECTS) gnulib\gnulib.lib pcre\pcre.lib glib.def glib.res
$(CC) $(CFLAGS) -LD -Fe$@ $(glib_OBJECTS) glib.res $(LIBICONV_LIBS) $(INTL_LIBS) \ $(CC) $(CFLAGS) -LD -Fe$@ $(glib_OBJECTS) glib.res $(LIBICONV_LIBS) $(INTL_LIBS) \
gnulib\gnulib.lib $(DIRENT_LIBS) user32.lib advapi32.lib shell32.lib wsock32.lib ole32.lib ws2_32.lib \ gnulib\gnulib.lib pcre\pcre.lib $(DIRENT_LIBS) user32.lib advapi32.lib shell32.lib wsock32.lib ole32.lib ws2_32.lib \
$(LDFLAGS) /implib:glib-2.0.lib /def:glib.def $(LDFLAGS) /implib:glib-2.0.lib /def:glib.def
gspawn-win32-helper.exe : gspawn-win32-helper.c libglib-2.0-@LT_CURRENT_MINUS_AGE@.dll gspawn-win32-helper.exe : gspawn-win32-helper.c libglib-2.0-@LT_CURRENT_MINUS_AGE@.dll

68
glib/pcre/COPYING Normal file
View File

@ -0,0 +1,68 @@
PCRE LICENCE
------------
PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
specified below. The documentation for PCRE, supplied in the "doc"
directory, is distributed under the same terms as the software itself.
The basic library functions are written in C and are freestanding. Also
included in the distribution is a set of C++ wrapper functions.
THE BASIC LIBRARY FUNCTIONS
---------------------------
Written by: Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England. Phone: +44 1223 334714.
Copyright (c) 1997-2006 University of Cambridge
All rights reserved.
THE C++ WRAPPER FUNCTIONS
-------------------------
Contributed by: Google Inc.
Copyright (c) 2006, Google Inc.
All rights reserved.
THE "BSD" LICENCE
-----------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the name of Google
Inc. nor the names of their contributors may be used to endorse or
promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
End

62
glib/pcre/Makefile.am Normal file
View File

@ -0,0 +1,62 @@
INCLUDES = \
-DG_LOG_DOMAIN=\"GLib-GRegex\" \
-DSUPPORT_UCP \
-DSUPPORT_UTF8 \
-DNEWLINE=-1 \
-DMATCH_LIMIT=10000000 \
-DMATCH_LIMIT_RECURSION=10000000 \
-DMAX_NAME_SIZE=32 \
-DMAX_NAME_COUNT=10000 \
-DMAX_DUPLENGTH=30000 \
-DLINK_SIZE=2 \
-DEBCDIC=0 \
-DPOSIX_MALLOC_THRESHOLD=10 \
-I$(top_srcdir) \
-I$(srcdir) \
-I$(top_srcdir)/glib \
@GLIB_DEBUG_FLAGS@ \
-DG_DISABLE_DEPRECATED \
$(DEPRECATED_FLAGS)\
$(WARN_CFLAGS) \
$(PCRE_WARN_CFLAGS) \
$(DEP_CFLAGS)
noinst_LTLIBRARIES = libpcre.la
libpcre_headers =
libpcre_la_SOURCES = \
pcre_chartables.c \
pcre_compile.c \
pcre_config.c \
pcre_dfa_exec.c \
pcre_exec.c \
pcre_fullinfo.c \
pcre_get.c \
pcre_globals.c \
pcre_info.c \
pcre_maketables.c \
pcre_newline.c \
pcre_ord2utf8.c \
pcre_refcount.c \
pcre_study.c \
pcre_tables.c \
pcre_try_flipped.c \
pcre_ucp_searchfuncs.c \
pcre_valid_utf8.c \
pcre_version.c \
pcre_xclass.c \
pcre.h \
pcre_internal.h \
ucp.h \
ucpinternal.h \
$(libpcre_headers)
libpcre_la_LIBADD = $(DEP_LIBS)
libpcre_la_LDFLAGS = -no-undefined
EXTRA_DIST = \
COPYING \
makefile.msc

49
glib/pcre/makefile.msc Normal file
View File

@ -0,0 +1,49 @@
!IFDEF DEBUG
CRT=-MDd
!ELSE
CRT=-MD
!ENDIF
CFLAGS = \
-I ..\.. \
-DHAVE_CONFIG_H \
-DHAVE_LONG_LONG_FORMAT \
-DSUPPORT_UCP \
-DSUPPORT_UTF8 \
-DNEWLINE=10 \
-DMATCH_LIMIT=10000000 \
-DMATCH_LIMIT_RECURSION=10000000 \
-DMAX_NAME_SIZE=32 \
-DMAX_NAME_COUNT=10000 \
-DMAX_DUPLENGTH=30000 \
-DLINK_SIZE=2 \
-DEBCDIC=0 \
-DPOSIX_MALLOC_THRESHOLD=10
OBJECTS = \
pcre_chartables.obj \
pcre_compile.obj \
pcre_config.obj \
pcre_dfa_exec.obj \
pcre_exec.obj \
pcre_fullinfo.obj \
pcre_get.obj \
pcre_globals.obj \
pcre_info.obj \
pcre_maketables.obj \
pcre_newline.obj \
pcre_ord2utf8.obj \
pcre_refcount.obj \
pcre_study.obj \
pcre_tables.obj \
pcre_try_flipped.obj \
pcre_ucp_searchfuncs.obj \
pcre_valid_utf8.obj \
pcre_version.obj \
pcre_xclass.obj \
pcre.lib : $(OBJECTS)
lib -out:pcre.lib $(OBJECTS)
.c.obj:
$(CC) $(CRT) $(CFLAGS) -Ox -GD -c $<

283
glib/pcre/pcre.h Normal file
View File

@ -0,0 +1,283 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef _PCRE_H
#define _PCRE_H
/* The current PCRE version information. */
/* NOTES FOR FUTURE MAINTAINERS: Do not use numbers with leading zeros, because
they may be treated as octal constants. The PCRE_PRERELEASE feature is for
identifying release candidates. It might be defined as -RC2, for example. In
real releases, it should be defined empty. Do not change the alignment of these
statments. The code in ./configure greps out the version numbers by using "cut"
to get values from column 29 onwards. These are substituted into pcre-config
and libpcre.pc. The values are not put into configure.ac and substituted here
(which would simplify this issue) because that makes life harder for those who
cannot run ./configure. As it now stands, this file need not be edited in that
circumstance. */
#define PCRE_MAJOR 7
#define PCRE_MINOR 0
#define PCRE_PRERELEASE
#define PCRE_DATE 18-Dec-2006
/* Win32 uses DLL by default; it needs special stuff for exported functions
when building PCRE. */
#ifdef _WIN32
# ifdef PCRE_DEFINITION
# ifdef DLL_EXPORT
# define PCRE_DATA_SCOPE __declspec(dllexport)
# endif
# else
# ifndef PCRE_STATIC
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
# endif
# endif
#endif
/* Otherwise, we use the standard "extern". */
#ifndef PCRE_DATA_SCOPE
# ifdef __cplusplus
# define PCRE_DATA_SCOPE extern "C"
# else
# define PCRE_DATA_SCOPE extern
# endif
#endif
/* Have to include stdlib.h in order to ensure that size_t is defined;
it is needed here for malloc. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options */
#define PCRE_CASELESS 0x00000001
#define PCRE_MULTILINE 0x00000002
#define PCRE_DOTALL 0x00000004
#define PCRE_EXTENDED 0x00000008
#define PCRE_ANCHORED 0x00000010
#define PCRE_DOLLAR_ENDONLY 0x00000020
#define PCRE_EXTRA 0x00000040
#define PCRE_NOTBOL 0x00000080
#define PCRE_NOTEOL 0x00000100
#define PCRE_UNGREEDY 0x00000200
#define PCRE_NOTEMPTY 0x00000400
#define PCRE_UTF8 0x00000800
#define PCRE_NO_AUTO_CAPTURE 0x00001000
#define PCRE_NO_UTF8_CHECK 0x00002000
#define PCRE_AUTO_CALLOUT 0x00004000
#define PCRE_PARTIAL 0x00008000
#define PCRE_DFA_SHORTEST 0x00010000
#define PCRE_DFA_RESTART 0x00020000
#define PCRE_FIRSTLINE 0x00040000
#define PCRE_DUPNAMES 0x00080000
#define PCRE_NEWLINE_CR 0x00100000
#define PCRE_NEWLINE_LF 0x00200000
#define PCRE_NEWLINE_CRLF 0x00300000
#define PCRE_NEWLINE_ANY 0x00400000
/* Exec-time and get/set-time error codes */
#define PCRE_ERROR_NOMATCH (-1)
#define PCRE_ERROR_NULL (-2)
#define PCRE_ERROR_BADOPTION (-3)
#define PCRE_ERROR_BADMAGIC (-4)
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
#define PCRE_ERROR_NOMEMORY (-6)
#define PCRE_ERROR_NOSUBSTRING (-7)
#define PCRE_ERROR_MATCHLIMIT (-8)
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
#define PCRE_ERROR_BADUTF8 (-10)
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
#define PCRE_ERROR_PARTIAL (-12)
#define PCRE_ERROR_BADPARTIAL (-13)
#define PCRE_ERROR_INTERNAL (-14)
#define PCRE_ERROR_BADCOUNT (-15)
#define PCRE_ERROR_DFA_UITEM (-16)
#define PCRE_ERROR_DFA_UCOND (-17)
#define PCRE_ERROR_DFA_UMLIMIT (-18)
#define PCRE_ERROR_DFA_WSSIZE (-19)
#define PCRE_ERROR_DFA_RECURSE (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
#define PCRE_ERROR_NULLWSLIMIT (-22)
#define PCRE_ERROR_BADNEWLINE (-23)
/* Request types for pcre_fullinfo() */
#define PCRE_INFO_OPTIONS 0
#define PCRE_INFO_SIZE 1
#define PCRE_INFO_CAPTURECOUNT 2
#define PCRE_INFO_BACKREFMAX 3
#define PCRE_INFO_FIRSTBYTE 4
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
#define PCRE_INFO_FIRSTTABLE 5
#define PCRE_INFO_LASTLITERAL 6
#define PCRE_INFO_NAMEENTRYSIZE 7
#define PCRE_INFO_NAMECOUNT 8
#define PCRE_INFO_NAMETABLE 9
#define PCRE_INFO_STUDYSIZE 10
#define PCRE_INFO_DEFAULT_TABLES 11
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_CONFIG_UTF8 0
#define PCRE_CONFIG_NEWLINE 1
#define PCRE_CONFIG_LINK_SIZE 2
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
#define PCRE_CONFIG_MATCH_LIMIT 4
#define PCRE_CONFIG_STACKRECURSE 5
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_STUDY_DATA 0x0001
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
/* Types */
struct real_pcre; /* declaration; the definition is private */
typedef struct real_pcre pcre;
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */
#ifndef PCRE_SPTR
#define PCRE_SPTR const char *
#endif
/* The structure for passing additional data to pcre_exec(). This is defined in
such as way as to be extensible. Always add new fields at the end, in order to
remain compatible. */
typedef struct pcre_extra {
unsigned long int flags; /* Bits for which fields are set */
void *study_data; /* Opaque data from pcre_study() */
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
} pcre_extra;
/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
without modification. */
typedef struct pcre_callout_block {
int version; /* Identifies version of block */
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
PCRE_SPTR subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
int capture_top; /* Max current capture */
int capture_last; /* Most recently closed capture */
void *callout_data; /* Data passed in with the call */
/* ------------------- Added for Version 1 -------------------------- */
int pattern_position; /* Offset to next item in the pattern */
int next_item_length; /* Length of next item in the pattern */
/* ------------------------------------------------------------------ */
} pcre_callout_block;
#include "glib.h"
#include "galias.h"
#define pcre_malloc g_try_malloc
#define pcre_free g_free
#define pcre_stack_malloc g_try_malloc
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
/* Exported PCRE functions */
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
const unsigned char *);
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
int *, const unsigned char *);
PCRE_DATA_SCOPE int pcre_config(int, void *);
PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *,
int *, int, const char *, char *, int);
PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
int);
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
const char *, int, int, int, int *, int , int *, int);
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
int *, int, const char *, const char **);
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
PCRE_DATA_SCOPE int pcre_get_stringtable_entries(const pcre *, const char *,
char **, char **);
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
const char **);
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
const char ***);
PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *);
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
PCRE_DATA_SCOPE int pcre_refcount(pcre *, int);
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
PCRE_DATA_SCOPE const char *pcre_version(void);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcre.h */

195
glib/pcre/pcre_chartables.c Normal file
View File

@ -0,0 +1,195 @@
/* This file is autogenerated by ../update-pcre/update.sh during
* the update of the local copy of PCRE.
*/
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This file is automatically written by the dftables auxiliary
program. If you edit it by hand, you might like to edit the Makefile to
prevent its ever being regenerated.
This file contains the default tables for characters with codes less than
128 (ASCII characters). These tables are used when no external tables are
passed to PCRE.
The following #include is present because without it gcc 4.x may remove
the array definition from the final binary if PCRE is built into a static
library and dead code stripping is activated. This leads to link errors.
Pulling in the header ensures that the array gets flagged as "someone
outside this compilation unit might reference this" and so it will always
be supplied to the linker. */
#include "pcre_internal.h"
const unsigned char _pcre_default_tables[] = {
/* This table is a lower casing table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table is a case flipping table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table contains bit maps for various character classes.
Each map is 32 bytes long and the bits run from the least
significant end of each byte. The classes that have their own
maps are: space, xdigit, digit, upper, lower, word, graph
print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* This table identifies various classes of character by individual bits:
0x01 white space character
0x02 letter
0x04 decimal digit
0x08 hexadecimal digit
0x10 alphanumeric or '_'
0x80 regular expression metacharacter or binary zero
*/
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
/* End of chartables.c */

5385
glib/pcre/pcre_compile.c Normal file

File diff suppressed because it is too large Load Diff

116
glib/pcre/pcre_config.c Normal file
View File

@ -0,0 +1,116 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_config(). */
#include "pcre_internal.h"
/*************************************************
* Return info about what features are configured *
*************************************************/
/* This function has an extensible interface so that additional items can be
added compatibly.
Arguments:
what what information is required
where where to put the information
Returns: 0 if data returned, negative on error
*/
PCRE_DATA_SCOPE int
pcre_config(int what, void *where)
{
switch (what)
{
case PCRE_CONFIG_UTF8:
#ifdef SUPPORT_UTF8
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_UNICODE_PROPERTIES:
#ifdef SUPPORT_UCP
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_NEWLINE:
*((int *)where) = NEWLINE;
break;
case PCRE_CONFIG_LINK_SIZE:
*((int *)where) = LINK_SIZE;
break;
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
*((int *)where) = POSIX_MALLOC_THRESHOLD;
break;
case PCRE_CONFIG_MATCH_LIMIT:
*((unsigned int *)where) = MATCH_LIMIT;
break;
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
*((unsigned int *)where) = MATCH_LIMIT_RECURSION;
break;
case PCRE_CONFIG_STACKRECURSE:
#ifdef NO_RECURSE
*((int *)where) = 0;
#else
*((int *)where) = 1;
#endif
break;
default: return PCRE_ERROR_BADOPTION;
}
return 0;
}
/* End of pcre_config.c */

2433
glib/pcre/pcre_dfa_exec.c Normal file

File diff suppressed because it is too large Load Diff

4199
glib/pcre/pcre_exec.c Normal file

File diff suppressed because it is too large Load Diff

149
glib/pcre/pcre_fullinfo.c Normal file
View File

@ -0,0 +1,149 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/*PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_fullinfo(), which returns
information about a compiled pattern. */
#include "pcre_internal.h"
/*************************************************
* Return info about compiled pattern *
*************************************************/
/* This is a newer "info" function which has an extensible interface so
that additional items can be added compatibly.
Arguments:
argument_re points to compiled code
extra_data points extra data, or NULL
what what information is required
where where to put the information
Returns: 0 if data returned, negative on error
*/
PCRE_DATA_SCOPE int
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
{
real_pcre internal_re;
pcre_study_data internal_study;
const real_pcre *re = (const real_pcre *)argument_re;
const pcre_study_data *study = NULL;
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
if (re->magic_number != MAGIC_NUMBER)
{
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}
switch (what)
{
case PCRE_INFO_OPTIONS:
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
break;
case PCRE_INFO_SIZE:
*((size_t *)where) = re->size;
break;
case PCRE_INFO_STUDYSIZE:
*((size_t *)where) = (study == NULL)? 0 : study->size;
break;
case PCRE_INFO_CAPTURECOUNT:
*((int *)where) = re->top_bracket;
break;
case PCRE_INFO_BACKREFMAX:
*((int *)where) = re->top_backref;
break;
case PCRE_INFO_FIRSTBYTE:
*((int *)where) =
((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
break;
/* Make sure we pass back the pointer to the bit vector in the external
block, not the internal copy (with flipped integer fields). */
case PCRE_INFO_FIRSTTABLE:
*((const uschar **)where) =
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
break;
case PCRE_INFO_LASTLITERAL:
*((int *)where) =
((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;
break;
case PCRE_INFO_NAMEENTRYSIZE:
*((int *)where) = re->name_entry_size;
break;
case PCRE_INFO_NAMECOUNT:
*((int *)where) = re->name_count;
break;
case PCRE_INFO_NAMETABLE:
*((const uschar **)where) = (const uschar *)re + re->name_table_offset;
break;
case PCRE_INFO_DEFAULT_TABLES:
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
break;
default: return PCRE_ERROR_BADOPTION;
}
return 0;
}
/* End of pcre_fullinfo.c */

461
glib/pcre/pcre_get.c Normal file
View File

@ -0,0 +1,461 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains some convenience functions for extracting substrings
from the subject string after a regex match has succeeded. The original idea
for these functions came from Scott Wimer. */
#include "pcre_internal.h"
/*************************************************
* Find number for named string *
*************************************************/
/* This function is used by the get_first_set() function below, as well
as being generally available. It assumes that names are unique.
Arguments:
code the compiled regex
stringname the name whose number is required
Returns: the number of the named parentheses, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
int
pcre_get_stringnumber(const pcre *code, const char *stringname)
{
int rc;
int entrysize;
int top, bot;
uschar *nametable;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
bot = 0;
while (top > bot)
{
int mid = (top + bot) / 2;
uschar *entry = nametable + entrysize*mid;
int c = strcmp(stringname, (char *)(entry + 2));
if (c == 0) return (entry[0] << 8) + entry[1];
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE_ERROR_NOSUBSTRING;
}
/*************************************************
* Find (multiple) entries for named string *
*************************************************/
/* This is used by the get_first_set() function below, as well as being
generally available. It is used when duplicated names are permitted.
Arguments:
code the compiled regex
stringname the name whose entries required
firstptr where to put the pointer to the first entry
lastptr where to put the pointer to the last entry
Returns: the length of each entry, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
int
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
{
int rc;
int entrysize;
int top, bot;
uschar *nametable, *lastentry;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
lastentry = nametable + entrysize * (top - 1);
bot = 0;
while (top > bot)
{
int mid = (top + bot) / 2;
uschar *entry = nametable + entrysize*mid;
int c = strcmp(stringname, (char *)(entry + 2));
if (c == 0)
{
uschar *first = entry;
uschar *last = entry;
while (first > nametable)
{
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
first -= entrysize;
}
while (last < lastentry)
{
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
last += entrysize;
}
*firstptr = (char *)first;
*lastptr = (char *)last;
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE_ERROR_NOSUBSTRING;
}
/*************************************************
* Find first set of multiple named strings *
*************************************************/
/* This function allows for duplicate names in the table of named substrings.
It returns the number of the first one that was set in a pattern match.
Arguments:
code the compiled regex
stringname the name of the capturing substring
ovector the vector of matched substrings
Returns: the number of the first that is set,
or the number of the last one if none are set,
or a negative number on error
*/
static int
get_first_set(const pcre *code, const char *stringname, int *ovector)
{
const real_pcre *re = (const real_pcre *)code;
int entrysize;
char *first, *last;
uschar *entry;
if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
return pcre_get_stringnumber(code, stringname);
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
if (entrysize <= 0) return entrysize;
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
{
int n = (entry[0] << 8) + entry[1];
if (ovector[n*2] >= 0) return n;
}
return (first[0] << 8) + first[1];
}
/*************************************************
* Copy captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer.
Note that we use memcpy() rather than strncpy() in case there are binary zeros
in the string.
Arguments:
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringnumber the number of the required substring
buffer where to put the substring
size the size of the buffer
Returns: if successful:
the length of the copied string, not including the zero
that is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) buffer too small
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, char *buffer, int size)
{
int yield;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
memcpy(buffer, subject + ovector[stringnumber], yield);
buffer[yield] = 0;
return yield;
}
/*************************************************
* Copy named captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by name. If the regex permits duplicate names, the first
substring that is set is chosen.
Arguments:
code the compiled regex
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringname the name of the required substring
buffer where to put the substring
size the size of the buffer
Returns: if successful:
the length of the copied string, not including the zero
that is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) buffer too small
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, char *buffer, int size)
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
}
/*************************************************
* Copy all captured strings to new store *
*************************************************/
/* This function gets one chunk of store and builds a list of pointers and all
of the captured substrings in it. A NULL pointer is put on the end of the list.
Arguments:
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
listptr set to point to the list of pointers
Returns: if successful: 0
if not successful:
PCRE_ERROR_NOMEMORY (-6) failed to get store
*/
int
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
{
int i;
int size = sizeof(char *);
int double_count = stringcount * 2;
char **stringlist;
char *p;
for (i = 0; i < double_count; i += 2)
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
stringlist = (char **)(pcre_malloc)(size);
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
*listptr = (const char **)stringlist;
p = (char *)(stringlist + stringcount + 1);
for (i = 0; i < double_count; i += 2)
{
int len = ovector[i+1] - ovector[i];
memcpy(p, subject + ovector[i], len);
*stringlist++ = p;
p += len;
*p++ = 0;
}
*stringlist = NULL;
return 0;
}
/*************************************************
* Free store obtained by get_substring_list *
*************************************************/
/* This function exists for the benefit of people calling PCRE from non-C
programs that can call its functions, but not free() or (pcre_free)() directly.
Argument: the result of a previous pcre_get_substring_list()
Returns: nothing
*/
void
pcre_free_substring_list(const char **pointer)
{
(pcre_free)((void *)pointer);
}
/*************************************************
* Copy captured string to new store *
*************************************************/
/* This function copies a single captured substring into a piece of new
store
Arguments:
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringnumber the number of the required substring
stringptr where to put a pointer to the substring
Returns: if successful:
the length of the string, not including the zero that
is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) failed to get store
PCRE_ERROR_NOSUBSTRING (-7) substring not present
*/
int
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
{
int yield;
char *substring;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
substring = (char *)(pcre_malloc)(yield + 1);
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
memcpy(substring, subject + ovector[stringnumber], yield);
substring[yield] = 0;
*stringptr = substring;
return yield;
}
/*************************************************
* Copy named captured string to new store *
*************************************************/
/* This function copies a single captured substring, identified by name, into
new store. If the regex permits duplicate names, the first substring that is
set is chosen.
Arguments:
code the compiled regex
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringname the name of the required substring
stringptr where to put the pointer
Returns: if successful:
the length of the copied string, not including the zero
that is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, const char **stringptr)
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
}
/*************************************************
* Free store obtained by get_substring *
*************************************************/
/* This function exists for the benefit of people calling PCRE from non-C
programs that can call its functions, but not free() or (pcre_free)() directly.
Argument: the result of a previous pcre_get_substring()
Returns: nothing
*/
void
pcre_free_substring(const char *pointer)
{
(pcre_free)((void *)pointer);
}
/* End of pcre_get.c */

59
glib/pcre/pcre_globals.c Normal file
View File

@ -0,0 +1,59 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains global variables that are exported by the PCRE library.
PCRE is thread-clean and doesn't use any global variables in the normal sense.
However, it calls memory allocation and freeing functions via the four
indirections below, and it can optionally do callouts, using the fifth
indirection. These values can be changed by the caller, but are shared between
all threads. However, when compiling for Virtual Pascal, things are done
differently, and global variables are not used (see pcre.in). */
#include "pcre_internal.h"
#ifdef __cplusplus
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;
#else
int (*pcre_callout)(pcre_callout_block *) = NULL;
#endif
/* End of pcre_globals.c */

89
glib/pcre/pcre_info.c Normal file
View File

@ -0,0 +1,89 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_info(), which gives some
information about a compiled pattern. However, use of this function is now
deprecated, as it has been superseded by pcre_fullinfo(). */
#include "pcre_internal.h"
/*************************************************
* (Obsolete) Return info about compiled pattern *
*************************************************/
/* This is the original "info" function. It picks potentially useful data out
of the private structure, but its interface was too rigid. It remains for
backwards compatibility. The public options are passed back in an int - though
the re->options field has been expanded to a long int, all the public options
at the low end of it, and so even on 16-bit systems this will still be OK.
Therefore, I haven't changed the API for pcre_info().
Arguments:
argument_re points to compiled code
optptr where to pass back the options
first_byte where to pass back the first character,
or -1 if multiline and all branches start ^,
or -2 otherwise
Returns: number of capturing subpatterns
or negative values on error
*/
PCRE_DATA_SCOPE int
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
{
real_pcre internal_re;
const real_pcre *re = (const real_pcre *)argument_re;
if (re == NULL) return PCRE_ERROR_NULL;
if (re->magic_number != MAGIC_NUMBER)
{
re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
}
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
if (first_byte != NULL)
*first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
return re->top_bracket;
}
/* End of pcre_info.c */

1041
glib/pcre/pcre_internal.h Normal file

File diff suppressed because it is too large Load Diff

140
glib/pcre/pcre_maketables.c Normal file
View File

@ -0,0 +1,140 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_maketables(), which builds
character tables for PCRE in the current locale. The file is compiled on its
own as part of the PCRE library. However, it is also included in the
compilation of dftables.c, in which case the macro DFTABLES is defined. */
#ifndef DFTABLES
#include "pcre_internal.h"
#endif
/*************************************************
* Create PCRE character tables *
*************************************************/
/* This function builds a set of character tables for use by PCRE and returns
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via pcre_malloc(), but when compiled
inside dftables, use malloc().
Arguments: none
Returns: pointer to the contiguous block of data
*/
const unsigned char *
pcre_maketables(void)
{
unsigned char *yield, *p;
int i;
#ifndef DFTABLES
yield = (unsigned char*)(pcre_malloc)(tables_length);
#else
yield = (unsigned char*)malloc(tables_length);
#endif
if (yield == NULL) return NULL;
p = yield;
/* First comes the lower casing table */
for (i = 0; i < 256; i++) *p++ = tolower(i);
/* Next the case-flipping table */
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
/* Then the character class tables. Don't try to be clever and save effort on
exclusive ones - in some locales things may be different. Note that the table
for "space" includes everything "isspace" gives, including VT in the default
locale. This makes it work for the POSIX class [:space:]. Note also that it is
possible for a character to be alnum or alpha without being lower or upper,
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
least under Debian Linux's locales as of 12/2005). So we must test for alnum
specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
}
p += cbit_length;
/* Finally, the character type table. In this, we exclude VT from the white
space chars, because Perl doesn't recognize it as such for \s and for comments
within regexes. */
for (i = 0; i < 256; i++)
{
int x = 0;
if (i != 0x0b && isspace(i)) x += ctype_space;
if (isalpha(i)) x += ctype_letter;
if (isdigit(i)) x += ctype_digit;
if (isxdigit(i)) x += ctype_xdigit;
if (isalnum(i) || i == '_') x += ctype_word;
/* Note: strchr includes the terminating zero in the characters it considers.
In this instance, that is ok because we want binary zero to be flagged as a
meta-character, which in this sense is any character that terminates a run
of data characters. */
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
*p++ = x;
}
return yield;
}
/* End of pcre_maketables.c */

135
glib/pcre/pcre_newline.c Normal file
View File

@ -0,0 +1,135 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains internal functions for testing newlines when more than
one kind of newline is to be recognized. When a newline is found, its length is
returned. In principle, we could implement several newline "types", each
referring to a different set of newline characters. At present, PCRE supports
only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL,
so for now the type isn't passed into the functions. It can easily be added
later if required. The full list of Unicode newline characters is taken from
http://unicode.org/unicode/reports/tr18/. */
#include "pcre_internal.h"
/*************************************************
* Check for newline at given position *
*************************************************/
/* It is guaranteed that the initial value of ptr is less than the end of the
string that is being processed.
Arguments:
ptr pointer to possible newline
endptr pointer to the end of the string
lenptr where to return the length
utf8 TRUE if in utf8 mode
Returns: TRUE or FALSE
*/
BOOL
_pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr,
BOOL utf8)
{
int c;
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
switch(c)
{
case 0x000a: /* LF */
case 0x000b: /* VT */
case 0x000c: *lenptr = 1; return TRUE; /* FF */
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
return TRUE; /* CR */
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
default: return FALSE;
}
}
/*************************************************
* Check for newline at previous position *
*************************************************/
/* It is guaranteed that the initial value of ptr is greater than the start of
the string that is being processed.
Arguments:
ptr pointer to possible newline
startptr pointer to the start of the string
lenptr where to return the length
utf8 TRUE if in utf8 mode
Returns: TRUE or FALSE
*/
BOOL
_pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr,
BOOL utf8)
{
int c;
ptr--;
if (utf8)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
else c = *ptr;
switch(c)
{
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
return TRUE; /* LF */
case 0x000b: /* VT */
case 0x000c: /* FF */
case 0x000d: *lenptr = 1; return TRUE; /* CR */
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
default: return FALSE;
}
}
/* End of pcre_newline.c */

78
glib/pcre/pcre_ord2utf8.c Normal file
View File

@ -0,0 +1,78 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This file contains a private PCRE function that converts an ordinal
character value into a UTF8 string. */
#include "pcre_internal.h"
/*************************************************
* Convert character value to UTF-8 *
*************************************************/
/* This function takes an integer value in the range 0 - 0x7fffffff
and encodes it as a UTF-8 character in 0 to 6 bytes.
Arguments:
cvalue the character value
buffer pointer to buffer for result - at least 6 bytes long
Returns: number of characters placed in the buffer
*/
int
_pcre_ord2utf8(int cvalue, uschar *buffer)
{
register int i, j;
for (i = 0; i < _pcre_utf8_table1_size; i++)
if (cvalue <= _pcre_utf8_table1[i]) break;
buffer += i;
for (j = i; j > 0; j--)
{
*buffer-- = 0x80 | (cvalue & 0x3f);
cvalue >>= 6;
}
*buffer = _pcre_utf8_table2[i] | cvalue;
return i + 1;
}
/* End of pcre_ord2utf8.c */

77
glib/pcre/pcre_refcount.c Normal file
View File

@ -0,0 +1,77 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_refcount(), which is an
auxiliary function that can be used to maintain a reference count in a compiled
pattern data block. This might be helpful in applications where the block is
shared by different users. */
#include "pcre_internal.h"
/*************************************************
* Maintain reference count *
*************************************************/
/* The reference count is a 16-bit field, initialized to zero. It is not
possible to transfer a non-zero count from one host to a different host that
has a different byte order - though I can't see why anyone in their right mind
would ever want to do that!
Arguments:
argument_re points to compiled code
adjust value to add to the count
Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
PCRE_DATA_SCOPE int
pcre_refcount(pcre *argument_re, int adjust)
{
real_pcre *re = (real_pcre *)argument_re;
if (re == NULL) return PCRE_ERROR_NULL;
re->ref_count = (-adjust > re->ref_count)? 0 :
(adjust + re->ref_count > 65535)? 65535 :
re->ref_count + adjust;
return re->ref_count;
}
/* End of pcre_refcount.c */

570
glib/pcre/pcre_study.c Normal file
View File

@ -0,0 +1,570 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_study(), along with local
supporting functions. */
#include "pcre_internal.h"
/* Returns from set_start_bits() */
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
/*************************************************
* Set a bit and maybe its alternate case *
*************************************************/
/* Given a character, set its bit in the table, and also the bit for the other
version of a letter if we are caseless.
Arguments:
start_bits points to the bit map
c is the character
caseless the caseless flag
cd the block with char table pointers
Returns: nothing
*/
static void
set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
{
start_bits[c/8] |= (1 << (c&7));
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
}
/*************************************************
* Create bitmap of starting bytes *
*************************************************/
/* This function scans a compiled unanchored expression recursively and
attempts to build a bitmap of the set of possible starting bytes. As time goes
by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
useful for parenthesized groups in patterns such as (a*)b where the group
provides some optional starting bytes but scanning must continue at the outer
level to find at least one mandatory byte. At the outermost level, this
function fails unless the result is SSB_DONE.
Arguments:
code points to an expression
start_bits points to a 32-byte table, initialized to 0
caseless the current state of the caseless flag
utf8 TRUE if in UTF-8 mode
cd the block with char table pointers
Returns: SSB_FAIL => Failed to find any starting bytes
SSB_DONE => Found mandatory starting bytes
SSB_CONTINUE => Found optional starting bytes
*/
static int
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
BOOL utf8, compile_data *cd)
{
register int c;
int yield = SSB_DONE;
#if 0
/* ========================================================================= */
/* The following comment and code was inserted in January 1999. In May 2006,
when it was observed to cause compiler warnings about unused values, I took it
out again. If anybody is still using OS/2, they will have to put it back
manually. */
/* This next statement and the later reference to dummy are here in order to
trick the optimizer of the IBM C compiler for OS/2 into generating correct
code. Apparently IBM isn't going to fix the problem, and we would rather not
disable optimization (in this module it actually makes a big difference, and
the pcre module can use all the optimization it can get). */
volatile int dummy;
/* ========================================================================= */
#endif
do
{
const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
BOOL try_next = TRUE;
while (try_next) /* Loop for items in this branch */
{
int rc;
switch(*tcode)
{
/* Fail if we reach something we don't understand */
default:
return SSB_FAIL;
/* If we hit a bracket or a positive lookahead assertion, recurse to set
bits from within the subpattern. If it can't find anything, we have to
give up. If it finds some mandatory character(s), we are done for this
branch. Otherwise, carry on scanning after the subpattern. */
case OP_BRA:
case OP_SBRA:
case OP_CBRA:
case OP_SCBRA:
case OP_ONCE:
case OP_ASSERT:
rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
if (rc == SSB_FAIL) return SSB_FAIL;
if (rc == SSB_DONE) try_next = FALSE; else
{
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
}
break;
/* If we hit ALT or KET, it means we haven't found anything mandatory in
this branch, though we might have found something optional. For ALT, we
continue with the next alternative, but we have to arrange that the final
result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
return SSB_CONTINUE: if this is the top level, that indicates failure,
but after a nested subpattern, it causes scanning to continue. */
case OP_ALT:
yield = SSB_CONTINUE;
try_next = FALSE;
break;
case OP_KET:
case OP_KETRMAX:
case OP_KETRMIN:
return SSB_CONTINUE;
/* Skip over callout */
case OP_CALLOUT:
tcode += 2 + 2*LINK_SIZE;
break;
/* Skip over lookbehind and negative lookahead assertions */
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
/* Skip over an option setting, changing the caseless flag */
case OP_OPT:
caseless = (tcode[1] & PCRE_CASELESS) != 0;
tcode += 2;
break;
/* BRAZERO does the bracket, but carries on. */
case OP_BRAZERO:
case OP_BRAMINZERO:
if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
return SSB_FAIL;
/* =========================================================================
See the comment at the head of this function concerning the next line,
which was an old fudge for the benefit of OS/2.
dummy = 1;
========================================================================= */
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
/* Single-char * or ? sets the bit and tries the next item */
case OP_STAR:
case OP_MINSTAR:
case OP_POSSTAR:
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
set_bit(start_bits, tcode[1], caseless, cd);
tcode += 2;
#ifdef SUPPORT_UTF8
if (utf8 && tcode[-1] >= 0xc0)
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
#endif
break;
/* Single-char upto sets the bit and tries the next */
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
set_bit(start_bits, tcode[3], caseless, cd);
tcode += 4;
#ifdef SUPPORT_UTF8
if (utf8 && tcode[-1] >= 0xc0)
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
#endif
break;
/* At least one single char sets the bit and stops */
case OP_EXACT: /* Fall through */
tcode += 2;
case OP_CHAR:
case OP_CHARNC:
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
set_bit(start_bits, tcode[1], caseless, cd);
try_next = FALSE;
break;
/* Single character type sets the bits and stops */
case OP_NOT_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_digit];
try_next = FALSE;
break;
case OP_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_digit];
try_next = FALSE;
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= ~d;
}
try_next = FALSE;
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= d;
}
try_next = FALSE;
break;
case OP_NOT_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_word];
try_next = FALSE;
break;
case OP_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_word];
try_next = FALSE;
break;
/* One or more character type fudges the pointer and restarts, knowing
it will hit a single character type and stop there. */
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
tcode++;
break;
case OP_TYPEEXACT:
tcode += 3;
break;
/* Zero or more repeats of character types set the bits and then
try again. */
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:
tcode += 2; /* Fall through */
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPOSSTAR:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
switch(tcode[1])
{
case OP_ANY:
return SSB_FAIL;
case OP_NOT_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_digit];
break;
case OP_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_digit];
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= ~d;
}
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= d;
}
break;
case OP_NOT_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_word];
break;
case OP_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_word];
break;
}
tcode += 2;
break;
/* Character class where all the information is in a bit map: set the
bits and either carry on or not, according to the repeat count. If it was
a negative class, and we are operating with UTF-8 characters, any byte
with a value >= 0xc4 is a potentially valid starter because it starts a
character with a value > 255. */
case OP_NCLASS:
if (utf8)
{
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
}
/* Fall through */
case OP_CLASS:
{
tcode++;
/* In UTF-8 mode, the bits in a bit map correspond to character
values, not to byte values. However, the bit map we are constructing is
for byte values. So we have to do a conversion for characters whose
value is > 127. In fact, there are only two possible starting bytes for
characters in the range 128 - 255. */
if (utf8)
{
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
for (c = 128; c < 256; c++)
{
if ((tcode[c/8] && (1 << (c&7))) != 0)
{
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
}
}
}
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
else
{
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
}
/* Advance past the bit map, and act on what follows */
tcode += 32;
switch (*tcode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
tcode++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
else try_next = FALSE;
break;
default:
try_next = FALSE;
break;
}
}
break; /* End of bitmap class handling */
} /* End of switch */
} /* End of try_next loop */
code += GET(code, 1); /* Advance to next branch */
}
while (*code == OP_ALT);
return yield;
}
/*************************************************
* Study a compiled expression *
*************************************************/
/* This function is handed a compiled expression that it must study to produce
information that will speed up the matching. It returns a pcre_extra block
which then gets handed back to pcre_exec().
Arguments:
re points to the compiled expression
options contains option bits
errorptr points to where to place error messages;
set NULL unless error
Returns: pointer to a pcre_extra block, with study_data filled in and the
appropriate flag set;
NULL on error or if no optimization possible
*/
PCRE_DATA_SCOPE pcre_extra *
pcre_study(const pcre *external_re, int options, const char **errorptr)
{
uschar start_bits[32];
pcre_extra *extra;
pcre_study_data *study;
const uschar *tables;
uschar *code;
compile_data compile_block;
const real_pcre *re = (const real_pcre *)external_re;
*errorptr = NULL;
if (re == NULL || re->magic_number != MAGIC_NUMBER)
{
*errorptr = "argument is not a compiled regular expression";
return NULL;
}
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
{
*errorptr = "unknown or incorrect option bit(s) set";
return NULL;
}
code = (uschar *)re + re->name_table_offset +
(re->name_count * re->name_entry_size);
/* For an anchored pattern, or an unanchored pattern that has a first char, or
a multiline pattern that matches only at "line starts", no further processing
at present. */
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
return NULL;
/* Set the character tables in the block that is passed around */
tables = re->tables;
if (tables == NULL)
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
(void *)(&tables));
compile_block.lcc = tables + lcc_offset;
compile_block.fcc = tables + fcc_offset;
compile_block.cbits = tables + cbits_offset;
compile_block.ctypes = tables + ctypes_offset;
/* See if we can find a fixed set of initial characters for the pattern. */
memset(start_bits, 0, 32 * sizeof(uschar));
if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
(re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
the latter, which is pointed to by the former, which may also get additional
data set later by the calling program. At the moment, the size of
pcre_study_data is fixed. We nevertheless save it in a field for returning via
the pcre_fullinfo() function so that if it becomes variable in the future, we
don't have to change that code. */
extra = (pcre_extra *)(pcre_malloc)
(sizeof(pcre_extra) + sizeof(pcre_study_data));
if (extra == NULL)
{
*errorptr = "failed to get memory";
return NULL;
}
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
extra->flags = PCRE_EXTRA_STUDY_DATA;
extra->study_data = study;
study->size = sizeof(pcre_study_data);
study->options = PCRE_STUDY_MAPPED;
memcpy(study->start_bits, start_bits, sizeof(start_bits));
return extra;
}
/* End of pcre_study.c */

304
glib/pcre/pcre_tables.c Normal file
View File

@ -0,0 +1,304 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains some fixed tables that are used by more than one of the
PCRE code modules. The tables are also #included by the pcretest program, which
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
clashes with the library. */
#include "pcre_internal.h"
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre_internal.h. */
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
/*************************************************
* Tables for UTF-8 support *
*************************************************/
/* These are the breakpoints for different numbers of bytes in a UTF-8
character. */
const int _pcre_utf8_table1[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
/* These are the indicator bits and the mask for the data bits to set in the
first byte of a character, indexed by the number of additional bytes. */
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
/* Table of the number of extra bytes, indexed by the first byte masked with
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
const uschar _pcre_utf8_table4[] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
/* This table translates Unicode property names into type and code values. It
is searched by binary chop, so must be in collating sequence of name. */
const char _pcre_ucp_names[] =
"Any\0"
"Arabic\0"
"Armenian\0"
"Balinese\0"
"Bengali\0"
"Bopomofo\0"
"Braille\0"
"Buginese\0"
"Buhid\0"
"C\0"
"Canadian_Aboriginal\0"
"Cc\0"
"Cf\0"
"Cherokee\0"
"Cn\0"
"Co\0"
"Common\0"
"Coptic\0"
"Cs\0"
"Cuneiform\0"
"Cypriot\0"
"Cyrillic\0"
"Deseret\0"
"Devanagari\0"
"Ethiopic\0"
"Georgian\0"
"Glagolitic\0"
"Gothic\0"
"Greek\0"
"Gujarati\0"
"Gurmukhi\0"
"Han\0"
"Hangul\0"
"Hanunoo\0"
"Hebrew\0"
"Hiragana\0"
"Inherited\0"
"Kannada\0"
"Katakana\0"
"Kharoshthi\0"
"Khmer\0"
"L\0"
"L&\0"
"Lao\0"
"Latin\0"
"Limbu\0"
"Linear_B\0"
"Ll\0"
"Lm\0"
"Lo\0"
"Lt\0"
"Lu\0"
"M\0"
"Malayalam\0"
"Mc\0"
"Me\0"
"Mn\0"
"Mongolian\0"
"Myanmar\0"
"N\0"
"Nd\0"
"New_Tai_Lue\0"
"Nko\0"
"Nl\0"
"No\0"
"Ogham\0"
"Old_Italic\0"
"Old_Persian\0"
"Oriya\0"
"Osmanya\0"
"P\0"
"Pc\0"
"Pd\0"
"Pe\0"
"Pf\0"
"Phags_Pa\0"
"Phoenician\0"
"Pi\0"
"Po\0"
"Ps\0"
"Runic\0"
"S\0"
"Sc\0"
"Shavian\0"
"Sinhala\0"
"Sk\0"
"Sm\0"
"So\0"
"Syloti_Nagri\0"
"Syriac\0"
"Tagalog\0"
"Tagbanwa\0"
"Tai_Le\0"
"Tamil\0"
"Telugu\0"
"Thaana\0"
"Thai\0"
"Tibetan\0"
"Tifinagh\0"
"Ugaritic\0"
"Yi\0"
"Z\0"
"Zl\0"
"Zp\0"
"Zs\0";
const ucp_type_table _pcre_utt[] = {
{ 0, PT_ANY, 0 },
{ 4, PT_SC, ucp_Arabic },
{ 11, PT_SC, ucp_Armenian },
{ 20, PT_SC, ucp_Balinese },
{ 29, PT_SC, ucp_Bengali },
{ 37, PT_SC, ucp_Bopomofo },
{ 46, PT_SC, ucp_Braille },
{ 54, PT_SC, ucp_Buginese },
{ 63, PT_SC, ucp_Buhid },
{ 69, PT_GC, ucp_C },
{ 71, PT_SC, ucp_Canadian_Aboriginal },
{ 91, PT_PC, ucp_Cc },
{ 94, PT_PC, ucp_Cf },
{ 97, PT_SC, ucp_Cherokee },
{ 106, PT_PC, ucp_Cn },
{ 109, PT_PC, ucp_Co },
{ 112, PT_SC, ucp_Common },
{ 119, PT_SC, ucp_Coptic },
{ 126, PT_PC, ucp_Cs },
{ 129, PT_SC, ucp_Cuneiform },
{ 139, PT_SC, ucp_Cypriot },
{ 147, PT_SC, ucp_Cyrillic },
{ 156, PT_SC, ucp_Deseret },
{ 164, PT_SC, ucp_Devanagari },
{ 175, PT_SC, ucp_Ethiopic },
{ 184, PT_SC, ucp_Georgian },
{ 193, PT_SC, ucp_Glagolitic },
{ 204, PT_SC, ucp_Gothic },
{ 211, PT_SC, ucp_Greek },
{ 217, PT_SC, ucp_Gujarati },
{ 226, PT_SC, ucp_Gurmukhi },
{ 235, PT_SC, ucp_Han },
{ 239, PT_SC, ucp_Hangul },
{ 246, PT_SC, ucp_Hanunoo },
{ 254, PT_SC, ucp_Hebrew },
{ 261, PT_SC, ucp_Hiragana },
{ 270, PT_SC, ucp_Inherited },
{ 280, PT_SC, ucp_Kannada },
{ 288, PT_SC, ucp_Katakana },
{ 297, PT_SC, ucp_Kharoshthi },
{ 308, PT_SC, ucp_Khmer },
{ 314, PT_GC, ucp_L },
{ 316, PT_LAMP, 0 },
{ 319, PT_SC, ucp_Lao },
{ 323, PT_SC, ucp_Latin },
{ 329, PT_SC, ucp_Limbu },
{ 335, PT_SC, ucp_Linear_B },
{ 344, PT_PC, ucp_Ll },
{ 347, PT_PC, ucp_Lm },
{ 350, PT_PC, ucp_Lo },
{ 353, PT_PC, ucp_Lt },
{ 356, PT_PC, ucp_Lu },
{ 359, PT_GC, ucp_M },
{ 361, PT_SC, ucp_Malayalam },
{ 371, PT_PC, ucp_Mc },
{ 374, PT_PC, ucp_Me },
{ 377, PT_PC, ucp_Mn },
{ 380, PT_SC, ucp_Mongolian },
{ 390, PT_SC, ucp_Myanmar },
{ 398, PT_GC, ucp_N },
{ 400, PT_PC, ucp_Nd },
{ 403, PT_SC, ucp_New_Tai_Lue },
{ 415, PT_SC, ucp_Nko },
{ 419, PT_PC, ucp_Nl },
{ 422, PT_PC, ucp_No },
{ 425, PT_SC, ucp_Ogham },
{ 431, PT_SC, ucp_Old_Italic },
{ 442, PT_SC, ucp_Old_Persian },
{ 454, PT_SC, ucp_Oriya },
{ 460, PT_SC, ucp_Osmanya },
{ 468, PT_GC, ucp_P },
{ 470, PT_PC, ucp_Pc },
{ 473, PT_PC, ucp_Pd },
{ 476, PT_PC, ucp_Pe },
{ 479, PT_PC, ucp_Pf },
{ 482, PT_SC, ucp_Phags_Pa },
{ 491, PT_SC, ucp_Phoenician },
{ 502, PT_PC, ucp_Pi },
{ 505, PT_PC, ucp_Po },
{ 508, PT_PC, ucp_Ps },
{ 511, PT_SC, ucp_Runic },
{ 517, PT_GC, ucp_S },
{ 519, PT_PC, ucp_Sc },
{ 522, PT_SC, ucp_Shavian },
{ 530, PT_SC, ucp_Sinhala },
{ 538, PT_PC, ucp_Sk },
{ 541, PT_PC, ucp_Sm },
{ 544, PT_PC, ucp_So },
{ 547, PT_SC, ucp_Syloti_Nagri },
{ 560, PT_SC, ucp_Syriac },
{ 567, PT_SC, ucp_Tagalog },
{ 575, PT_SC, ucp_Tagbanwa },
{ 584, PT_SC, ucp_Tai_Le },
{ 591, PT_SC, ucp_Tamil },
{ 597, PT_SC, ucp_Telugu },
{ 604, PT_SC, ucp_Thaana },
{ 611, PT_SC, ucp_Thai },
{ 616, PT_SC, ucp_Tibetan },
{ 624, PT_SC, ucp_Tifinagh },
{ 633, PT_SC, ucp_Ugaritic },
{ 642, PT_SC, ucp_Yi },
{ 645, PT_GC, ucp_Z },
{ 647, PT_PC, ucp_Zl },
{ 650, PT_PC, ucp_Zp },
{ 653, PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
/* End of pcre_tables.c */

View File

@ -0,0 +1,132 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that tests a compiled pattern to
see if it was compiled with the opposite endianness. If so, it uses an
auxiliary local function to flip the appropriate bytes. */
#include "pcre_internal.h"
/*************************************************
* Flip bytes in an integer *
*************************************************/
/* This function is called when the magic number in a regex doesn't match, in
order to flip its bytes to see if we are dealing with a pattern that was
compiled on a host of different endianness. If so, this function is used to
flip other byte values.
Arguments:
value the number to flip
n the number of bytes to flip (assumed to be 2 or 4)
Returns: the flipped value
*/
static unsigned long int
byteflip(unsigned long int value, int n)
{
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
return ((value & 0x000000ff) << 24) |
((value & 0x0000ff00) << 8) |
((value & 0x00ff0000) >> 8) |
((value & 0xff000000) >> 24);
}
/*************************************************
* Test for a byte-flipped compiled regex *
*************************************************/
/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
is, it was compiled on a system of opposite endianness. The function is called
only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
we flip all the relevant values into a different data block, and return it.
Arguments:
re points to the regex
study points to study data, or NULL
internal_re points to a new regex block
internal_study points to a new study block
Returns: the new block if is is indeed a byte-flipped regex
NULL if it is not
*/
real_pcre *
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
const pcre_study_data *study, pcre_study_data *internal_study)
{
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
return NULL;
*internal_re = *re; /* To copy other fields */
internal_re->size = byteflip(re->size, sizeof(re->size));
internal_re->options = byteflip(re->options, sizeof(re->options));
internal_re->top_bracket =
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
internal_re->top_backref =
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
internal_re->first_byte =
(pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
internal_re->req_byte =
(pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
internal_re->name_table_offset =
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
internal_re->name_entry_size =
(pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
internal_re->name_count =
(pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
if (study != NULL)
{
*internal_study = *study; /* To copy other fields */
internal_study->size = byteflip(study->size, sizeof(study->size));
internal_study->options = byteflip(study->options, sizeof(study->options));
}
return internal_re;
}
/* End of pcre_tryflipped.c */

View File

@ -0,0 +1,126 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This file has been modified to use glib instead of the internal table
* in ucptable.c -- Marco Barisione */
/* This module contains code for searching the table of Unicode character
properties. */
#include "pcre_internal.h"
#include "ucp.h" /* Category definitions */
#include "ucpinternal.h" /* Internal table details */
/* Table to translate from particular type value to the general value. */
static int ucp_gentype[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/*************************************************
* Search table and return type *
*************************************************/
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
Arguments:
c the character value
type_ptr the detailed character type is returned here
script_ptr the script is returned here
Returns: the character type category
*/
int
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
{
/* Note that the Unicode types have the same values in glib and in
* PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
* ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
*type_ptr = g_unichar_type(c);
*script_ptr = g_unichar_get_script(c);
return ucp_gentype[*type_ptr];
}
/*************************************************
* Search table and return other case *
*************************************************/
/* If the given character is a letter, and there is another case for the
letter, return the other case. Otherwise, return -1.
Arguments:
c the character value
Returns: the other case or NOTACHAR if none
*/
unsigned int
_pcre_ucp_othercase(const unsigned int c)
{
int other_case = NOTACHAR;
if (g_unichar_islower(c))
other_case = g_unichar_toupper(c);
else if (g_unichar_isupper(c))
other_case = g_unichar_tolower(c);
if (other_case == c)
other_case = NOTACHAR;
return other_case;
}
/* End of pcre_ucp_searchfuncs.c */

View File

@ -0,0 +1,13 @@
#include "pcre_internal.h"
/*
* This function is not needed by GRegex, so print an error and
* return always -1, that is the string is a valid UTF-8 encoded
* string.
*/
int
_pcre_valid_utf8(const uschar *string, int length)
{
g_warning ("%s: this function should not be called", G_STRLOC);
return -1;
}

86
glib/pcre/pcre_version.c Normal file
View File

@ -0,0 +1,86 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_version(), which returns a
string that identifies the PCRE version that is in use. */
#include "pcre_internal.h"
/*************************************************
* Return version string *
*************************************************/
/* These macros are the standard way of turning unquoted text into C strings.
They allow macros like PCRE_MAJOR to be defined without quotes, which is
convenient for user programs that want to test its value. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
production releases. Originally, it was used naively in this code:
return XSTRING(PCRE_MAJOR)
"." XSTRING(PCRE_MINOR)
XSTRING(PCRE_PRERELEASE)
" " XSTRING(PCRE_DATE);
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
STRING(). The C standard states: "If (before argument substitution) any
argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what we
really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. To cope with both ways of
handling this, I had resort to some messy hackery that does a test at run time.
I could find no way of detecting that a macro is defined as an empty string at
pre-processor time. This hack uses a standard trick for avoiding calling
the STRING macro with an empty argument when doing the test. */
PCRE_DATA_SCOPE const char *
pcre_version(void)
{
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
}
/* End of pcre_version.c */

144
glib/pcre/pcre_xclass.c Normal file
View File

@ -0,0 +1,144 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that is used to match an extended
class (one that contains characters whose values are > 255). It is used by both
pcre_exec() and pcre_def_exec(). */
#include "pcre_internal.h"
/*************************************************
* Match character against an XCLASS *
*************************************************/
/* This function is called to match a character against an extended class that
might contain values > 255.
Arguments:
c the character
data points to the flag byte of the XCLASS data
Returns: TRUE if character matches, else FALSE
*/
BOOL
_pcre_xclass(int c, const uschar *data)
{
int t;
BOOL negated = (*data & XCL_NOT) != 0;
/* Character values < 256 are matched against a bitmap, if one is present. If
not, we still carry on, because there may be ranges that start below 256 in the
additional data. */
if (c < 256)
{
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
return !negated; /* char found */
}
/* First skip the bit map if present. Then match against the list of Unicode
properties or large chars or ranges that end with a large char. We won't ever
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
if ((*data++ & XCL_MAP) != 0) data += 32;
while ((t = *data++) != XCL_END)
{
int x, y;
if (t == XCL_SINGLE)
{
GETCHARINC(x, data);
if (c == x) return !negated;
}
else if (t == XCL_RANGE)
{
GETCHARINC(x, data);
GETCHARINC(y, data);
if (c >= x && c <= y) return !negated;
}
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
switch(*data)
{
case PT_ANY:
if (t == XCL_PROP) return !negated;
break;
case PT_LAMP:
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
(t == XCL_PROP)) return !negated;
break;
case PT_GC:
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
break;
case PT_PC:
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
break;
case PT_SC:
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
break;
/* This should never occur, but compilers may mutter if there is no
default. */
default:
return FALSE;
}
data += 2;
}
#endif /* SUPPORT_UCP */
}
return negated; /* char did not match */
}
/* End of pcre_xclass.c */

133
glib/pcre/ucp.h Normal file
View File

@ -0,0 +1,133 @@
/*************************************************
* Unicode Property Table handler *
*************************************************/
#ifndef _UCP_H
#define _UCP_H
/* This file contains definitions of the property values that are returned by
the function _pcre_ucp_findprop(). New values that are added for new releases
of Unicode should always be at the end of each enum, for backwards
compatibility. */
/* These are the general character categories. */
enum {
ucp_C, /* Other */
ucp_L, /* Letter */
ucp_M, /* Mark */
ucp_N, /* Number */
ucp_P, /* Punctuation */
ucp_S, /* Symbol */
ucp_Z /* Separator */
};
/* These are the particular character types. */
enum {
ucp_Cc, /* Control */
ucp_Cf, /* Format */
ucp_Cn, /* Unassigned */
ucp_Co, /* Private use */
ucp_Cs, /* Surrogate */
ucp_Ll, /* Lower case letter */
ucp_Lm, /* Modifier letter */
ucp_Lo, /* Other letter */
ucp_Lt, /* Title case letter */
ucp_Lu, /* Upper case letter */
ucp_Mc, /* Spacing mark */
ucp_Me, /* Enclosing mark */
ucp_Mn, /* Non-spacing mark */
ucp_Nd, /* Decimal number */
ucp_Nl, /* Letter number */
ucp_No, /* Other number */
ucp_Pc, /* Connector punctuation */
ucp_Pd, /* Dash punctuation */
ucp_Pe, /* Close punctuation */
ucp_Pf, /* Final punctuation */
ucp_Pi, /* Initial punctuation */
ucp_Po, /* Other punctuation */
ucp_Ps, /* Open punctuation */
ucp_Sc, /* Currency symbol */
ucp_Sk, /* Modifier symbol */
ucp_Sm, /* Mathematical symbol */
ucp_So, /* Other symbol */
ucp_Zl, /* Line separator */
ucp_Zp, /* Paragraph separator */
ucp_Zs /* Space separator */
};
/* These are the script identifications. */
enum {
ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
ucp_Common = G_UNICODE_SCRIPT_COMMON,
ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
ucp_Greek = G_UNICODE_SCRIPT_GREEK,
ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
ucp_Han = G_UNICODE_SCRIPT_HAN,
ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
ucp_Lao = G_UNICODE_SCRIPT_LAO,
ucp_Latin = G_UNICODE_SCRIPT_LATIN,
ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
ucp_Thai = G_UNICODE_SCRIPT_THAI,
ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
ucp_Yi = G_UNICODE_SCRIPT_YI,
ucp_Balinese = G_UNICODE_SCRIPT_BALINESE, /* New for Unicode 5.0.0 */
ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, /* New for Unicode 5.0.0 */
ucp_Nko = G_UNICODE_SCRIPT_NKO, /* New for Unicode 5.0.0 */
ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, /* New for Unicode 5.0.0 */
ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN /* New for Unicode 5.0.0 */
};
#endif
/* End of ucp.h */

92
glib/pcre/ucpinternal.h Normal file
View File

@ -0,0 +1,92 @@
/*************************************************
* Unicode Property Table handler *
*************************************************/
#ifndef _UCPINTERNAL_H
#define _UCPINTERNAL_H
/* Internal header file defining the layout of the bits in each pair of 32-bit
words that form a data item in the table. */
typedef struct cnode {
pcre_uint32 f0;
pcre_uint32 f1;
} cnode;
/* Things for the f0 field */
#define f0_scriptmask 0xff000000 /* Mask for script field */
#define f0_scriptshift 24 /* Shift for script value */
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
#define f0_charmask 0x001fffff /* Mask for code point value */
/* Things for the f1 field */
#define f1_typemask 0xfc000000 /* Mask for char type field */
#define f1_typeshift 26 /* Shift for the type field */
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
#define f1_casemask 0x0000ffff /* Mask for a case offset */
#define f1_caseneg 0xffff8000 /* Bits for negation */
/* The data consists of a vector of structures of type cnode. The two unsigned
32-bit integers are used as follows:
(f0) (1) The most significant byte holds the script number. The numbers are
defined by the enum in ucp.h.
(2) The 0x00800000 bit is set if this entry defines a range of characters.
It is not set if this entry defines a single character
(3) The 0x00600000 bits are spare.
(4) The 0x001fffff bits contain the code point. No Unicode code point will
ever be greater than 0x0010ffff, so this should be OK for ever.
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
defined by an enum in ucp.h.
(2) The 0x03ff0000 bits are spare.
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
range if this entry defines a range, OR the *signed* offset to the
character's "other case" partner if this entry defines a single
character. There is no partner if the value is zero.
-------------------------------------------------------------------------------
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
-------------------------------------------------------------------------------
| | | | |
| | |-> spare | |-> spare
| | |
| |-> spare |-> spare
|
|-> range flag
The upper/lower casing information is set only for characters that come in
pairs. The non-one-to-one mappings in the Unicode data are ignored.
When searching the data, proceed as follows:
(1) Set up for a binary chop search.
(2) If the top is not greater than the bottom, the character is not in the
table. Its type must therefore be "Cn" ("Undefined").
(3) Find the middle vector element.
(4) Extract the code point and compare. If equal, we are done.
(5) If the test character is smaller, set the top to the current point, and
goto (2).
(6) If the current entry defines a range, compute the last character by adding
the offset, and see if the test character is within the range. If it is,
we are done.
(7) Otherwise, set the bottom to one element past the current point and goto
(2).
*/
#endif /* _UCPINTERNAL_H */
/* End of ucpinternal.h */

View File

@ -0,0 +1,8 @@
EXTRA_DIST = \
update.sh \
Makefile.am-1 \
Makefile.am-2 \
digitab.patch \
memory.patch \
pcre_ucp_searchfuncs.c \
pcre_valid_utf8.c

View File

@ -0,0 +1,28 @@
INCLUDES = \
-DG_LOG_DOMAIN=\"GLib-GRegex\" \
-DSUPPORT_UCP \
-DSUPPORT_UTF8 \
-DNEWLINE=-1 \
-DMATCH_LIMIT=10000000 \
-DMATCH_LIMIT_RECURSION=10000000 \
-DMAX_NAME_SIZE=32 \
-DMAX_NAME_COUNT=10000 \
-DMAX_DUPLENGTH=30000 \
-DLINK_SIZE=2 \
-DEBCDIC=0 \
-DPOSIX_MALLOC_THRESHOLD=10 \
-I$(top_srcdir) \
-I$(srcdir) \
-I$(top_srcdir)/glib \
@GLIB_DEBUG_FLAGS@ \
-DG_DISABLE_DEPRECATED \
$(DEPRECATED_FLAGS)\
$(WARN_CFLAGS) \
$(PCRE_WARN_CFLAGS) \
$(DEP_CFLAGS)
noinst_LTLIBRARIES = libpcre.la
libpcre_headers =
libpcre_la_SOURCES = \

View File

@ -0,0 +1,10 @@
$(libpcre_headers)
libpcre_la_LIBADD = $(DEP_LIBS)
libpcre_la_LDFLAGS = -no-undefined
EXTRA_DIST = \
COPYING \
makefile.msc

View File

@ -0,0 +1,133 @@
--- pcre_compile.c 2006-10-10 12:00:00.000000000 +0200
+++ pcre_compile.c 2006-10-10 12:00:00.000000000 +0200
@@ -246,130 +246,6 @@ static const char *error_texts[] = {
};
-/* Table to identify digits and hex digits. This is used when compiling
-patterns. Note that the tables in chartables are dependent on the locale, and
-may mark arbitrary characters as digits - but the PCRE compiling code expects
-to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have
-a private table here. It costs 256 bytes, but it is a lot faster than doing
-character value tests (at least in some simple cases I timed), and in some
-applications one wants PCRE to compile efficiently as well as match
-efficiently.
-
-For convenience, we use the same bit definitions as in chartables:
-
- 0x04 decimal digit
- 0x08 hexadecimal digit
-
-Then we can use ctype_digit and ctype_xdigit in the code. */
-
-#if !EBCDIC /* This is the "normal" case, for ASCII systems */
-static const unsigned char digitab[] =
- {
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
- 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */
- 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
-
-#else /* This is the "abnormal" case, for EBCDIC systems */
-static const unsigned char digitab[] =
- {
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- ¬ */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */
- 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */
- 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
-
-static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
- 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */
- 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
- 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
- 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */
- 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */
- 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */
- 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- ¬ */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */
- 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */
- 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */
- 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */
- 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */
- 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */
- 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
- 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */
- 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */
- 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */
- 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
- 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
-#endif
-
-
/* Definition to allow mutual recursion */
static BOOL

View File

@ -0,0 +1,87 @@
diff -r 0f4042339eb5 pcre/pcre.h
--- pcre/pcre.h Tue Jul 25 22:39:16 2006 +0200
+++ pcre/pcre.h Tue Jul 25 22:52:10 2006 +0200
@@ -233,25 +233,14 @@ typedef struct pcre_callout_block {
/* ------------------------------------------------------------------ */
} pcre_callout_block;
-/* Indirection for store get and free functions. These can be set to
-alternative malloc/free functions if required. Special ones are used in the
-non-recursive case for "frames". There is also an optional callout function
-that is triggered by the (?) regex item. For Virtual Pascal, these definitions
-have to take another form. */
-
-#ifndef VPCOMPAT
-PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
-PCRE_DATA_SCOPE void (*pcre_free)(void *);
-PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
-PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
+#include "glib.h"
+#include "galias.h"
+
+#define pcre_malloc g_try_malloc
+#define pcre_free g_free
+#define pcre_stack_malloc g_try_malloc
+
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
-#else /* VPCOMPAT */
-PCRE_DATA_SCOPE void *pcre_malloc(size_t);
-PCRE_DATA_SCOPE void pcre_free(void *);
-PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t);
-PCRE_DATA_SCOPE void pcre_stack_free(void *);
-PCRE_DATA_SCOPE int pcre_callout(pcre_callout_block *);
-#endif /* VPCOMPAT */
/* Exported PCRE functions */
diff -r 0f4042339eb5 pcre/pcre_globals.c
--- pcre/pcre_globals.c Tue Jul 25 22:39:16 2006 +0200
+++ pcre/pcre_globals.c Tue Jul 25 22:52:10 2006 +0200
@@ -50,32 +50,9 @@ differently, and global variables are no
#include "pcre_internal.h"
-#ifndef VPCOMPAT
-
-/**************************************************************************
-This code used to be here for use when compiling as a C++ library. However,
-according to Dair Grant it is not needed: "
-
- Including 'extern "C"' in the declaration generates an "initialized and
- declared `extern'" warning from gcc 4.0.1. Since we include pcre_internal.h,
- which includes pcre.h, which declares these prototypes within an extern "C" {}
- block, we shouldn't need the prefix here.
-
-So, from Release 7.0 I have cut this out.
-
#ifdef __cplusplus
-extern "C" void *(*pcre_malloc)(size_t) = malloc;
-extern "C" void (*pcre_free)(void *) = free;
-extern "C" void *(*pcre_stack_malloc)(size_t) = malloc;
-extern "C" void (*pcre_stack_free)(void *) = free;
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;
#else
-**************************************************************************/
-
-void *(*pcre_malloc)(size_t) = malloc;
-void (*pcre_free)(void *) = free;
-void *(*pcre_stack_malloc)(size_t) = malloc;
-void (*pcre_stack_free)(void *) = free;
int (*pcre_callout)(pcre_callout_block *) = NULL;
#endif
diff -r 0f4042339eb5 pcre/pcre_internal.h
--- pcre/pcre_internal.h Tue Jul 25 22:39:16 2006 +0200
+++ pcre/pcre_internal.h Tue Jul 25 22:52:10 2006 +0200
@@ -480,10 +480,7 @@ variable-length repeat, or a anything ot
/* Miscellaneous definitions */
-typedef int BOOL;
-
-#define FALSE 0
-#define TRUE 1
+typedef gboolean BOOL;
/* Escape items that are just an encoding of a particular data value. */

View File

@ -0,0 +1,126 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This file has been modified to use glib instead of the internal table
* in ucptable.c -- Marco Barisione */
/* This module contains code for searching the table of Unicode character
properties. */
#include "pcre_internal.h"
#include "ucp.h" /* Category definitions */
#include "ucpinternal.h" /* Internal table details */
/* Table to translate from particular type value to the general value. */
static int ucp_gentype[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/*************************************************
* Search table and return type *
*************************************************/
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
Arguments:
c the character value
type_ptr the detailed character type is returned here
script_ptr the script is returned here
Returns: the character type category
*/
int
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
{
/* Note that the Unicode types have the same values in glib and in
* PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
* ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
*type_ptr = g_unichar_type(c);
*script_ptr = g_unichar_get_script(c);
return ucp_gentype[*type_ptr];
}
/*************************************************
* Search table and return other case *
*************************************************/
/* If the given character is a letter, and there is another case for the
letter, return the other case. Otherwise, return -1.
Arguments:
c the character value
Returns: the other case or NOTACHAR if none
*/
unsigned int
_pcre_ucp_othercase(const unsigned int c)
{
int other_case = NOTACHAR;
if (g_unichar_islower(c))
other_case = g_unichar_toupper(c);
else if (g_unichar_isupper(c))
other_case = g_unichar_tolower(c);
if (other_case == c)
other_case = NOTACHAR;
return other_case;
}
/* End of pcre_ucp_searchfuncs.c */

View File

@ -0,0 +1,13 @@
#include "pcre_internal.h"
/*
* This function is not needed by GRegex, so print an error and
* return always -1, that is the string is a valid UTF-8 encoded
* string.
*/
int
_pcre_valid_utf8(const uschar *string, int length)
{
g_warning ("%s: this function should not be called", G_STRLOC);
return -1;
}

141
glib/update-pcre/ucp.patch Normal file
View File

@ -0,0 +1,141 @@
--- pcre/ucp.h 2006-07-05 13:28:01.000000000 +0200
+++ pcre/ucp.h 2006-10-09 16:27:19.000000000 +0200
@@ -60,72 +60,72 @@ enum {
/* These are the script identifications. */
enum {
- ucp_Arabic,
- ucp_Armenian,
- ucp_Bengali,
- ucp_Bopomofo,
- ucp_Braille,
- ucp_Buginese,
- ucp_Buhid,
- ucp_Canadian_Aboriginal,
- ucp_Cherokee,
- ucp_Common,
- ucp_Coptic,
- ucp_Cypriot,
- ucp_Cyrillic,
- ucp_Deseret,
- ucp_Devanagari,
- ucp_Ethiopic,
- ucp_Georgian,
- ucp_Glagolitic,
- ucp_Gothic,
- ucp_Greek,
- ucp_Gujarati,
- ucp_Gurmukhi,
- ucp_Han,
- ucp_Hangul,
- ucp_Hanunoo,
- ucp_Hebrew,
- ucp_Hiragana,
- ucp_Inherited,
- ucp_Kannada,
- ucp_Katakana,
- ucp_Kharoshthi,
- ucp_Khmer,
- ucp_Lao,
- ucp_Latin,
- ucp_Limbu,
- ucp_Linear_B,
- ucp_Malayalam,
- ucp_Mongolian,
- ucp_Myanmar,
- ucp_New_Tai_Lue,
- ucp_Ogham,
- ucp_Old_Italic,
- ucp_Old_Persian,
- ucp_Oriya,
- ucp_Osmanya,
- ucp_Runic,
- ucp_Shavian,
- ucp_Sinhala,
- ucp_Syloti_Nagri,
- ucp_Syriac,
- ucp_Tagalog,
- ucp_Tagbanwa,
- ucp_Tai_Le,
- ucp_Tamil,
- ucp_Telugu,
- ucp_Thaana,
- ucp_Thai,
- ucp_Tibetan,
- ucp_Tifinagh,
- ucp_Ugaritic,
- ucp_Yi,
- ucp_Balinese, /* New for Unicode 5.0.0 */
- ucp_Cuneiform, /* New for Unicode 5.0.0 */
- ucp_Nko, /* New for Unicode 5.0.0 */
- ucp_Phags_Pa, /* New for Unicode 5.0.0 */
- ucp_Phoenician /* New for Unicode 5.0.0 */
+ ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
+ ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
+ ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
+ ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
+ ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
+ ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
+ ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
+ ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
+ ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
+ ucp_Common = G_UNICODE_SCRIPT_COMMON,
+ ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
+ ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
+ ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
+ ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
+ ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
+ ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
+ ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
+ ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
+ ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
+ ucp_Greek = G_UNICODE_SCRIPT_GREEK,
+ ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
+ ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
+ ucp_Han = G_UNICODE_SCRIPT_HAN,
+ ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
+ ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
+ ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
+ ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
+ ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
+ ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
+ ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
+ ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
+ ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
+ ucp_Lao = G_UNICODE_SCRIPT_LAO,
+ ucp_Latin = G_UNICODE_SCRIPT_LATIN,
+ ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
+ ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
+ ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
+ ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
+ ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
+ ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
+ ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
+ ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
+ ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
+ ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
+ ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
+ ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
+ ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
+ ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
+ ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
+ ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
+ ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
+ ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
+ ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
+ ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
+ ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
+ ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
+ ucp_Thai = G_UNICODE_SCRIPT_THAI,
+ ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
+ ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
+ ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
+ ucp_Yi = G_UNICODE_SCRIPT_YI,
+ ucp_Balinese = G_UNICODE_SCRIPT_BALINESE, /* New for Unicode 5.0.0 */
+ ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, /* New for Unicode 5.0.0 */
+ ucp_Nko = G_UNICODE_SCRIPT_NKO, /* New for Unicode 5.0.0 */
+ ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, /* New for Unicode 5.0.0 */
+ ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN /* New for Unicode 5.0.0 */
};
#endif

159
glib/update-pcre/update.sh Normal file
View File

@ -0,0 +1,159 @@
#! /bin/sh
IN="../update-pcre"
PCRE=$1
if [ "x$PCRE" = x -o "x$PCRE" = x--help -o "x$PCRE" = x-h ]; then
cat >&2 << EOF
$0 PCRE-DIR
Updates the local PCRE copy with a different version of the library,
contained in the directory PCRE-DIR.
This will delete the content of the local pcre directory, copy the
necessary files from PCRE-DIR, and generate other needed files, such
as Makefile.am
EOF
exit
fi
if [ ! -f gregex.h ]; then
echo "This script should be executed from the directory containing gregex.c." 2> /dev/null
exit 1
fi
if [ ! -f $PCRE/Makefile.in -o ! -f $PCRE/pcre_compile.c ]; then
echo "'$PCRE' does not contain a valid PCRE version." 2> /dev/null
exit 1
fi
echo "Deleting old PCRE library"
mv pcre/.svn tmp-pcre-svn
rm -R pcre 2> /dev/null
mkdir pcre
cd pcre
# pcre_chartables.c is generated by dfatables.
# We do not want to compile and execute dfatables.c every time, because
# this could be a problem (e.g. when cross-compiling), so now generate
# the file and then distribuite it with GRegex.
echo "Generating pcre_chartables.c"
cp -R $PCRE tmp-build
cd tmp-build
./configure --enable-utf8 --enable-unicode-properties --disable-cpp > /dev/null
make pcre_chartables.c > /dev/null
cat > ../pcre_chartables.c << \EOF
/* This file is autogenerated by ../update-pcre/update.sh during
* the update of the local copy of PCRE.
*/
EOF
cat pcre_chartables.c >> ../pcre_chartables.c
cd ..
rm -R tmp-build
# Compiled C files.
echo "Generating makefiles"
all_files=`awk '/^OBJ = /, /^\\s*$/ \
{ \
sub("^OBJ = ", ""); \
sub(".@OBJEXT@[[:blank:]]*\\\\\\\\", ""); \
sub("\\\\$\\\\(POSIX_OBJ\\\\)", ""); \
print; \
}' \
$PCRE/Makefile.in`
# Headers.
included_files="pcre.h pcre_internal.h ucp.h ucpinternal.h"
# Generate Makefile.am.
cat $IN/Makefile.am-1 > Makefile.am
for name in $all_files; do
echo " $name.c \\" >> Makefile.am
if [ $name != pcre_chartables ]; then
# pcre_chartables.c is a generated file.
cp $PCRE/$name.c .
fi
done
for f in $included_files; do
echo " $f \\" >> Makefile.am
cp $PCRE/$f .
done
cat $IN/Makefile.am-2 >> Makefile.am
# Generate makefile.msc
cat > makefile.msc << EOF
!IFDEF DEBUG
CRT=-MDd
!ELSE
CRT=-MD
!ENDIF
CFLAGS = \\
-I ..\\.. \\
-DHAVE_CONFIG_H \\
-DHAVE_LONG_LONG_FORMAT \\
-DSUPPORT_UCP \\
-DSUPPORT_UTF8 \\
-DNEWLINE=10 \\
-DMATCH_LIMIT=10000000 \\
-DMATCH_LIMIT_RECURSION=10000000 \\
-DMAX_NAME_SIZE=32 \\
-DMAX_NAME_COUNT=10000 \\
-DMAX_DUPLENGTH=30000 \\
-DLINK_SIZE=2 \\
-DEBCDIC=0 \\
-DPOSIX_MALLOC_THRESHOLD=10
OBJECTS = \\
`
for f in $all_files; do
echo " $f.obj \\\\"
done
`
pcre.lib : \$(OBJECTS)
lib -out:pcre.lib \$(OBJECTS)
.c.obj:
\$(CC) \$(CRT) \$(CFLAGS) -Ox -GD -c $<
EOF
echo "Patching PCRE"
# Copy the license.
cp $PCRE/COPYING .
# Use glib for memory allocation.
patch > /dev/null < $IN/memory.patch
# Copy the modified version of pcre_valid_utf8.c.
cp $IN/pcre_valid_utf8.c .
# Copy the modified version of pcre_ucp_searchfuncs.c that uses glib
# for Unicode properties.
cp $IN/pcre_ucp_searchfuncs.c .
patch > /dev/null < $IN/ucp.patch
# Remove the digitab array in pcre_compile.c.
patch > /dev/null < $IN/digitab.patch
sed -i -e 's/(digitab\[\(.*\)\] & ctype_digit)/g_ascii_isdigit(\1)/' pcre_compile.c
sed -i -e 's/(digitab\[\(.*\)\] & ctype_xdigit)/g_ascii_isxdigit(\1)/' pcre_compile.c
# Reduce the number of relocations.
$IN/make_utt.py
patch > /dev/null < $IN/utt.patch
patch > /dev/null < $IN/table-reduction.patch
# Copy back the old SVN directory.
mv ../tmp-pcre-svn .svn
cat << EOF
Update completed. You now should check that everything is working.
Remember to update the regex syntax doc with the new features
(docs/reference/glib/regex-syntax.sgml) and to run the tests.
EOF

View File

@ -1,6 +1,12 @@
SUBDIRS=gobject refcount SUBDIRS=gobject refcount
INCLUDES = -g -I$(top_srcdir) -I$(top_srcdir)/glib -I$(top_srcdir)/gmodule $(GLIB_DEBUG_FLAGS) if ENABLE_REGEX
enable_regex = -DENABLE_REGEX
else
enable_regex =
endif
INCLUDES = -g -I$(top_srcdir) -I$(top_srcdir)/glib -I$(top_srcdir)/gmodule $(GLIB_DEBUG_FLAGS) $(enable_regex)
EFENCE= EFENCE=
@ -112,7 +118,8 @@ test_programs = \
unicode-encoding \ unicode-encoding \
utf8-validate \ utf8-validate \
utf8-pointer \ utf8-pointer \
uri-test uri-test \
regex-test
test_scripts = run-markup-tests.sh run-collate-tests.sh run-bookmark-test.sh test_scripts = run-markup-tests.sh run-collate-tests.sh run-bookmark-test.sh
@ -183,6 +190,7 @@ unicode_collate_LDADD = $(progs_ldadd)
utf8_validate_LDADD = $(progs_ldadd) utf8_validate_LDADD = $(progs_ldadd)
utf8_pointer_LDADD = $(progs_ldadd) utf8_pointer_LDADD = $(progs_ldadd)
uri_test_LDADD = $(progs_ldadd) uri_test_LDADD = $(progs_ldadd)
regex_test_LDADD = $(progs_ldadd)
lib_LTLIBRARIES = libmoduletestplugin_a.la libmoduletestplugin_b.la lib_LTLIBRARIES = libmoduletestplugin_a.la libmoduletestplugin_b.la

2607
tests/regex-test.c Normal file

File diff suppressed because it is too large Load Diff