mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-02-07 19:35:50 +01:00
Add GRegex for regular expression matching. (#50075)
2007-03-15 Marco Barisione <marco@barisione.org> Add GRegex for regular expression matching. (#50075) * configure.in: Handle GRegex compilation. * glib/gregex.c: * glib/gregex.h: Code for GRegex. * glib/Makefile.am: * glib/makefile.msc.in: Updated makefiles. * glib/pcre/*: Internal copy of PCRE. * glib/update-pcre/*: Stuff to automatically update the internal PCRE to a newer version. * tests/regex-test.c: * tests/Makefile.am: * tests/makefile.msc.in: Add tests for GRegex. svn path=/trunk/; revision=5408
This commit is contained in:
parent
af8671792d
commit
0196d63975
21
ChangeLog
21
ChangeLog
@ -1,3 +1,24 @@
|
|||||||
|
2007-03-15 Marco Barisione <marco@barisione.org>
|
||||||
|
|
||||||
|
Add GRegex for regular expression matching. (#50075)
|
||||||
|
|
||||||
|
* configure.in: Handle GRegex compilation.
|
||||||
|
|
||||||
|
* glib/gregex.c:
|
||||||
|
* glib/gregex.h: Code for GRegex.
|
||||||
|
|
||||||
|
* glib/Makefile.am:
|
||||||
|
* glib/makefile.msc.in: Updated makefiles.
|
||||||
|
|
||||||
|
* glib/pcre/*: Internal copy of PCRE.
|
||||||
|
|
||||||
|
* glib/update-pcre/*: Stuff to automatically update the internal PCRE
|
||||||
|
to a newer version.
|
||||||
|
|
||||||
|
* tests/regex-test.c:
|
||||||
|
* tests/Makefile.am:
|
||||||
|
* tests/makefile.msc.in: Add tests for GRegex.
|
||||||
|
|
||||||
2007-03-15 Chris Wilson <chris@chris-wilson.co.uk>
|
2007-03-15 Chris Wilson <chris@chris-wilson.co.uk>
|
||||||
|
|
||||||
* glib/gmain.c (g_main_dispatch): Replace a
|
* glib/gmain.c (g_main_dispatch): Replace a
|
||||||
|
72
configure.in
72
configure.in
@ -173,7 +173,7 @@ AM_CONDITIONAL(MS_LIB_AVAILABLE, [test x$ms_librarian = xyes])
|
|||||||
if test "$glib_native_win32" != yes; then
|
if test "$glib_native_win32" != yes; then
|
||||||
# libtool option to control which symbols are exported
|
# libtool option to control which symbols are exported
|
||||||
# right now, symbols starting with _ are not exported
|
# right now, symbols starting with _ are not exported
|
||||||
LIBTOOL_EXPORT_OPTIONS='-export-symbols-regex "^[[^_]].*"'
|
LIBTOOL_EXPORT_OPTIONS='-export-symbols-regex "^g.*"'
|
||||||
else
|
else
|
||||||
# We currently use .def files on Windows
|
# We currently use .def files on Windows
|
||||||
LIBTOOL_EXPORT_OPTIONS=
|
LIBTOOL_EXPORT_OPTIONS=
|
||||||
@ -2146,6 +2146,74 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([[
|
|||||||
[broken_poll="no (cross compiling)"])
|
[broken_poll="no (cross compiling)"])
|
||||||
AC_MSG_RESULT($broken_poll)
|
AC_MSG_RESULT($broken_poll)
|
||||||
|
|
||||||
|
dnl *********************
|
||||||
|
dnl *** GRegex checks ***
|
||||||
|
dnl *********************
|
||||||
|
PCRE_REQUIRED_VERSION=7.0
|
||||||
|
|
||||||
|
# Check if we should compile GRegex
|
||||||
|
AC_ARG_ENABLE(regex, AC_HELP_STRING([--disable-regex],
|
||||||
|
[disable the compilation of GRegex]),
|
||||||
|
[case "${enableval}" in
|
||||||
|
yes) enable_regex=true ;;
|
||||||
|
no) enable_regex=false ;;
|
||||||
|
*) AC_MSG_ERROR(bad value ${enableval} for --enable-regex) ;;
|
||||||
|
esac],
|
||||||
|
[enable_regex=true])
|
||||||
|
|
||||||
|
AM_CONDITIONAL(ENABLE_REGEX, $enable_regex)
|
||||||
|
|
||||||
|
if test x$enable_regex = xtrue; then
|
||||||
|
# Check if we should use the internal or the system-supplied pcre
|
||||||
|
AC_ARG_WITH(pcre,
|
||||||
|
[AC_HELP_STRING([--with-pcre=@<:@internal/system@:>@],
|
||||||
|
[specify whether to use the internal or the
|
||||||
|
system-supplied PCRE library])])
|
||||||
|
|
||||||
|
AM_CONDITIONAL(USE_SYSTEM_PCRE, [test "x$with_pcre" = xsystem])
|
||||||
|
|
||||||
|
if test "x$with_pcre" = xsystem; then
|
||||||
|
PKG_CHECK_MODULES(PCRE,
|
||||||
|
libpcre >= $PCRE_REQUIRED_VERSION)
|
||||||
|
AC_CACHE_CHECK([for Unicode support in PCRE],glib_cv_pcre_has_unicode,[
|
||||||
|
CFLAGS="$PCRE_CFLAGS" LDFLAGS="$PCRE_LIBS"
|
||||||
|
AC_TRY_RUN([#include <pcre.h>
|
||||||
|
int main () {
|
||||||
|
int support;
|
||||||
|
pcre_config (PCRE_CONFIG_UTF8, &support);
|
||||||
|
if (!support)
|
||||||
|
return 1;
|
||||||
|
pcre_config (PCRE_CONFIG_UNICODE_PROPERTIES, &support);
|
||||||
|
if (!support)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
|
}],
|
||||||
|
glib_cv_pcre_has_unicode=yes,
|
||||||
|
glib_cv_pcre_has_unicode=no,
|
||||||
|
glib_cv_pcre_has_unicode=yes)])
|
||||||
|
if test "$glib_cv_pcre_has_unicode" = "no"; then
|
||||||
|
AC_MSG_ERROR([*** The system-supplied PCRE does not support Unicode properties or UTF-8.])
|
||||||
|
fi
|
||||||
|
AC_SUBST(PCRE_CFLAGS)
|
||||||
|
AC_SUBST(PCRE_LIBS)
|
||||||
|
AC_DEFINE(USE_SYSTEM_PCRE, [], [using the system-supplied PCRE library])
|
||||||
|
else
|
||||||
|
# If using gcc 4 pass -Wno-pointer-sign when compiling the internal PCRE
|
||||||
|
if test x"$GCC" = xyes; then
|
||||||
|
AC_MSG_CHECKING([whether gcc understands -Wno-pointer-sign])
|
||||||
|
if test [`$CC --version | sed -e 's/[^0-9]*\([0-9]\).*/\1/' -e q`] -ge 4; then
|
||||||
|
PCRE_WARN_CFLAGS="$PCRE_WARN_CFLAGS -Wno-pointer-sign"
|
||||||
|
AC_MSG_RESULT([yes])
|
||||||
|
else
|
||||||
|
AC_MSG_RESULT([no])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
AC_SUBST(PCRE_WARN_CFLAGS)
|
||||||
|
else
|
||||||
|
AM_CONDITIONAL(USE_SYSTEM_PCRE, false])
|
||||||
|
fi
|
||||||
|
|
||||||
dnl **********************
|
dnl **********************
|
||||||
dnl *** Win32 API libs ***
|
dnl *** Win32 API libs ***
|
||||||
dnl **********************
|
dnl **********************
|
||||||
@ -2864,6 +2932,8 @@ Makefile
|
|||||||
glib/Makefile
|
glib/Makefile
|
||||||
glib/libcharset/Makefile
|
glib/libcharset/Makefile
|
||||||
glib/gnulib/Makefile
|
glib/gnulib/Makefile
|
||||||
|
glib/pcre/Makefile
|
||||||
|
glib/update-pcre/Makefile
|
||||||
gmodule/Makefile
|
gmodule/Makefile
|
||||||
gmodule/gmoduleconf.h
|
gmodule/gmoduleconf.h
|
||||||
gobject/Makefile
|
gobject/Makefile
|
||||||
|
@ -1,3 +1,17 @@
|
|||||||
|
2007-03-15 Marco Barisione <marco@barisione.org>
|
||||||
|
|
||||||
|
Add GRegex for regular expression matching. (#50075)
|
||||||
|
|
||||||
|
* glib/Makefile.am:
|
||||||
|
* glib/glib-docs.sgml:
|
||||||
|
* glib/glib-sections.txt:
|
||||||
|
* glib/tmpl/glib-unused.sgml:
|
||||||
|
* glib/regex-syntax.sgml:
|
||||||
|
* glib/tmpl/gregex-unused.sgml:
|
||||||
|
* glib/tmpl/gregex.sgml: Add GRegex.
|
||||||
|
|
||||||
|
* glib/building.sgml: Document build options for GRegex.
|
||||||
|
|
||||||
2007-03-14 Stefan Kost <ensonic@users.sf.net>
|
2007-03-14 Stefan Kost <ensonic@users.sf.net>
|
||||||
|
|
||||||
* gobject/tmpl/gparamspec.sgml:
|
* gobject/tmpl/gparamspec.sgml:
|
||||||
|
@ -37,7 +37,9 @@ IGNORE_HFILES= \
|
|||||||
gmirroringtable.h \
|
gmirroringtable.h \
|
||||||
gscripttable.h \
|
gscripttable.h \
|
||||||
glib-mirroring-tab \
|
glib-mirroring-tab \
|
||||||
gnulib
|
gnulib \
|
||||||
|
pcre \
|
||||||
|
update-pcre
|
||||||
|
|
||||||
# Extra options to supply to gtkdoc-mkdb
|
# Extra options to supply to gtkdoc-mkdb
|
||||||
MKDB_OPTIONS=--sgml-mode --output-format=xml --ignore-files=trio
|
MKDB_OPTIONS=--sgml-mode --output-format=xml --ignore-files=trio
|
||||||
@ -55,6 +57,7 @@ content_files = \
|
|||||||
changes.sgml \
|
changes.sgml \
|
||||||
compiling.sgml \
|
compiling.sgml \
|
||||||
resources.sgml \
|
resources.sgml \
|
||||||
|
regex-syntax.sgml \
|
||||||
version.xml \
|
version.xml \
|
||||||
glib-gettextize.xml
|
glib-gettextize.xml
|
||||||
|
|
||||||
|
@ -146,6 +146,16 @@ How to compile GLib itself
|
|||||||
e.g. POSIX threads, DCE threads or Solaris threads.
|
e.g. POSIX threads, DCE threads or Solaris threads.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
GRegex uses the the <ulink url="http://www.pcre.org/">PCRE library</ulink>
|
||||||
|
for regular expression matching. The default is to use the internal
|
||||||
|
version of PCRE that is patched to use GLib for memory management
|
||||||
|
and Unicode handling. If you prefer to use the system-supplied PCRE
|
||||||
|
library you can pass the --with-pcre=system option to configure,
|
||||||
|
but it is not recommended.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
@ -177,6 +187,13 @@ How to compile GLib itself
|
|||||||
<group>
|
<group>
|
||||||
<arg>--with-threads=[none|posix|dce|win32]</arg>
|
<arg>--with-threads=[none|posix|dce|win32]</arg>
|
||||||
</group>
|
</group>
|
||||||
|
<group>
|
||||||
|
<arg>--disable-regex</arg>
|
||||||
|
<arg>--enable-regex</arg>
|
||||||
|
</group>
|
||||||
|
<group>
|
||||||
|
<arg>--with-pcre=[internal|system]</arg>
|
||||||
|
</group>
|
||||||
<group>
|
<group>
|
||||||
<arg>--disable-included-printf</arg>
|
<arg>--disable-included-printf</arg>
|
||||||
<arg>--enable-included-printf</arg>
|
<arg>--enable-included-printf</arg>
|
||||||
@ -361,6 +378,61 @@ How to compile GLib itself
|
|||||||
</para>
|
</para>
|
||||||
</formalpara>
|
</formalpara>
|
||||||
|
|
||||||
|
<formalpara>
|
||||||
|
<title><systemitem>--disable-regex</systemitem> and
|
||||||
|
<systemitem>--enable-regex</systemitem></title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Do not compile GLib with regular expression support.
|
||||||
|
GLib will be smaller because it will not need the
|
||||||
|
PCRE library. This is however not recommended, as
|
||||||
|
programs may need GRegex.
|
||||||
|
</para>
|
||||||
|
</formalpara>
|
||||||
|
|
||||||
|
<formalpara>
|
||||||
|
<title><systemitem>--with-pcre</systemitem></title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Specify whether to use the internal or the system-supplied
|
||||||
|
PCRE library.
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem><para>
|
||||||
|
'internal' means that GRegex will be compiled to use
|
||||||
|
the internal PCRE library.
|
||||||
|
</para></listitem>
|
||||||
|
|
||||||
|
<listitem><para>
|
||||||
|
'system' means that GRegex will be compiled to use
|
||||||
|
the system-supplied PCRE library.
|
||||||
|
</para></listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
Using the internal PCRE is the preferred solution:
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
System-supplied PCRE has a separated copy of the big tables
|
||||||
|
used for Unicode handling.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Some systems have PCRE libraries compiled without some needed
|
||||||
|
features, such as UTF-8 and Unicode support.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
PCRE uses some global variables for memory management and
|
||||||
|
other features. In the rare case of a program using both
|
||||||
|
GRegex and PCRE (maybe indirectly through a library),
|
||||||
|
this variables could lead to problems when they are modified.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</formalpara>
|
||||||
|
|
||||||
<formalpara>
|
<formalpara>
|
||||||
<title><systemitem>--disable-included-printf</systemitem> and
|
<title><systemitem>--disable-included-printf</systemitem> and
|
||||||
<systemitem>--enable-included-printf</systemitem></title>
|
<systemitem>--enable-included-printf</systemitem></title>
|
||||||
|
@ -61,6 +61,7 @@
|
|||||||
<!ENTITY glib-Bookmarkfile SYSTEM "xml/bookmarkfile.xml">
|
<!ENTITY glib-Bookmarkfile SYSTEM "xml/bookmarkfile.xml">
|
||||||
<!ENTITY glib-Base64 SYSTEM "xml/base64.xml">
|
<!ENTITY glib-Base64 SYSTEM "xml/base64.xml">
|
||||||
<!ENTITY glib-i18n SYSTEM "xml/i18n.xml">
|
<!ENTITY glib-i18n SYSTEM "xml/i18n.xml">
|
||||||
|
<!ENTITY glib-Regex SYSTEM "xml/gregex.xml">
|
||||||
<!ENTITY glib-Version SYSTEM "xml/version.xml">
|
<!ENTITY glib-Version SYSTEM "xml/version.xml">
|
||||||
|
|
||||||
<!ENTITY glib-Compiling SYSTEM "compiling.sgml">
|
<!ENTITY glib-Compiling SYSTEM "compiling.sgml">
|
||||||
@ -69,6 +70,7 @@
|
|||||||
<!ENTITY glib-Running SYSTEM "running.sgml">
|
<!ENTITY glib-Running SYSTEM "running.sgml">
|
||||||
<!ENTITY glib-Resources SYSTEM "resources.sgml">
|
<!ENTITY glib-Resources SYSTEM "resources.sgml">
|
||||||
<!ENTITY glib-Changes SYSTEM "changes.sgml">
|
<!ENTITY glib-Changes SYSTEM "changes.sgml">
|
||||||
|
<!ENTITY glib-RegexSyntax SYSTEM "regex-syntax.sgml">
|
||||||
|
|
||||||
<!ENTITY glib-gettextize SYSTEM "glib-gettextize.xml">
|
<!ENTITY glib-gettextize SYSTEM "glib-gettextize.xml">
|
||||||
|
|
||||||
@ -101,6 +103,7 @@ synchronize their operation.
|
|||||||
&glib-Compiling;
|
&glib-Compiling;
|
||||||
&glib-Running;
|
&glib-Running;
|
||||||
&glib-Changes;
|
&glib-Changes;
|
||||||
|
&glib-RegexSyntax;
|
||||||
&glib-Resources;
|
&glib-Resources;
|
||||||
|
|
||||||
</chapter>
|
</chapter>
|
||||||
@ -151,6 +154,7 @@ synchronize their operation.
|
|||||||
&glib-Shell;
|
&glib-Shell;
|
||||||
&glib-Option;
|
&glib-Option;
|
||||||
&glib-Pattern-Matching;
|
&glib-Pattern-Matching;
|
||||||
|
&glib-Regex;
|
||||||
&glib-Markup;
|
&glib-Markup;
|
||||||
&glib-Keyfile;
|
&glib-Keyfile;
|
||||||
&glib-Bookmarkfile;
|
&glib-Bookmarkfile;
|
||||||
|
@ -863,6 +863,50 @@ g_pattern_match_string
|
|||||||
g_pattern_match_simple
|
g_pattern_match_simple
|
||||||
</SECTION>
|
</SECTION>
|
||||||
|
|
||||||
|
<SECTION>
|
||||||
|
<TITLE>Perl-compatible regular expressions</TITLE>
|
||||||
|
<FILE>gregex</FILE>
|
||||||
|
GRegexError
|
||||||
|
G_REGEX_ERROR
|
||||||
|
GRegexCompileFlags
|
||||||
|
GRegexMatchFlags
|
||||||
|
GRegex
|
||||||
|
GRegexEvalCallback
|
||||||
|
g_regex_new
|
||||||
|
g_regex_free
|
||||||
|
g_regex_optimize
|
||||||
|
g_regex_copy
|
||||||
|
g_regex_get_pattern
|
||||||
|
g_regex_clear
|
||||||
|
g_regex_match_simple
|
||||||
|
g_regex_match
|
||||||
|
g_regex_match_full
|
||||||
|
g_regex_match_next
|
||||||
|
g_regex_match_next_full
|
||||||
|
g_regex_match_all
|
||||||
|
g_regex_match_all_full
|
||||||
|
g_regex_get_match_count
|
||||||
|
g_regex_is_partial_match
|
||||||
|
g_regex_fetch
|
||||||
|
g_regex_fetch_pos
|
||||||
|
g_regex_fetch_named
|
||||||
|
g_regex_fetch_named_pos
|
||||||
|
g_regex_fetch_all
|
||||||
|
g_regex_get_string_number
|
||||||
|
g_regex_split_simple
|
||||||
|
g_regex_split
|
||||||
|
g_regex_split_full
|
||||||
|
g_regex_split_next
|
||||||
|
g_regex_split_next_full
|
||||||
|
g_regex_expand_references
|
||||||
|
g_regex_replace
|
||||||
|
g_regex_replace_literal
|
||||||
|
g_regex_replace_eval
|
||||||
|
g_regex_escape_string
|
||||||
|
<SUBSECTION Private>
|
||||||
|
g_regex_error_quark
|
||||||
|
</SECTION>
|
||||||
|
|
||||||
<SECTION>
|
<SECTION>
|
||||||
<TITLE>Message Logging</TITLE>
|
<TITLE>Message Logging</TITLE>
|
||||||
<FILE>messages</FILE>
|
<FILE>messages</FILE>
|
||||||
|
2704
docs/reference/glib/regex-syntax.sgml
Normal file
2704
docs/reference/glib/regex-syntax.sgml
Normal file
File diff suppressed because it is too large
Load Diff
@ -712,6 +712,13 @@ To use this function you must configure glib with the flag
|
|||||||
|
|
||||||
@mem: the memory to check.
|
@mem: the memory to check.
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_error_quark ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@Returns:
|
||||||
|
|
||||||
<!-- ##### FUNCTION g_scanner_stat_mode ##### -->
|
<!-- ##### FUNCTION g_scanner_stat_mode ##### -->
|
||||||
<para>
|
<para>
|
||||||
Gets the file attributes.
|
Gets the file attributes.
|
||||||
|
578
docs/reference/glib/tmpl/gregex.sgml
Normal file
578
docs/reference/glib/tmpl/gregex.sgml
Normal file
@ -0,0 +1,578 @@
|
|||||||
|
<!-- ##### SECTION Title ##### -->
|
||||||
|
Perl-compatible regular expressions
|
||||||
|
|
||||||
|
<!-- ##### SECTION Short_Description ##### -->
|
||||||
|
matches strings against regular expressions.
|
||||||
|
|
||||||
|
<!-- ##### SECTION Long_Description ##### -->
|
||||||
|
<para>
|
||||||
|
The <function>g_regex_*()</function> functions implement regular
|
||||||
|
expression pattern matching using syntax and semantics similar to
|
||||||
|
Perl regular expression.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Some functions accept a <parameter>start_position</parameter> argument,
|
||||||
|
setting it differs from just passing over a shortened string and setting
|
||||||
|
#G_REGEX_MATCH_NOTBOL in the case of a pattern that begins with any kind
|
||||||
|
of lookbehind assertion.
|
||||||
|
For example, consider the pattern "\Biss\B" which finds occurrences of "iss"
|
||||||
|
in the middle of words. ("\B" matches only if the current position in the
|
||||||
|
subject is not a word boundary.) When applied to the string "Mississipi"
|
||||||
|
from the fourth byte, namely "issipi", it does not match, because "\B" is
|
||||||
|
always false at the start of the subject, which is deemed to be a word
|
||||||
|
boundary. However, if the entire string is passed , but with
|
||||||
|
<parameter>start_position</parameter> set to 4, it finds the second
|
||||||
|
occurrence of "iss" because it is able to look behind the starting point
|
||||||
|
to discover that it is preceded by a letter.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that, unless you set the #G_REGEX_RAW flag, all the strings passed
|
||||||
|
to these functions must be encoded in UTF-8. The lengths and the positions
|
||||||
|
inside the strings are in bytes and not in characters, so, for instance,
|
||||||
|
"\xc3\xa0" (i.e. "à") is two bytes long but it is treated as a single
|
||||||
|
character. If you set #G_REGEX_RAW the strings can be non-valid UTF-8
|
||||||
|
strings and a byte is treated as a character, so "\xc3\xa0" is two bytes
|
||||||
|
and two characters long.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
When matching a pattern, "\n" matches only against a "\n" character in the
|
||||||
|
string, and "\r" matches only a "\r" character. To match any newline sequence
|
||||||
|
use "\R". This particular group matches either the two-character sequence
|
||||||
|
CR + LF ("\r\n"), or one of the single characters LF (linefeed, U+000A, "\n"), VT
|
||||||
|
(vertical tab, U+000B, "\v"), FF (formfeed, U+000C, "\f"), CR (carriage return,
|
||||||
|
U+000D, "\r"), NEL (next line, U+0085), LS (line separator, U+2028), or PS
|
||||||
|
(paragraph separator, U+2029).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The behaviour of the dot, circumflex, and dollar metacharacters are affected by
|
||||||
|
newline characters, the default is to recognize any newline character (the same
|
||||||
|
characters recognized by "\R"). This can be changed with #G_REGEX_NEWLINE_CR,
|
||||||
|
#G_REGEX_NEWLINE_LF and #G_REGEX_NEWLINE_CRLF compile options,
|
||||||
|
and with #G_REGEX_MATCH_NEWLINE_ANY, #G_REGEX_MATCH_NEWLINE_CR,
|
||||||
|
#G_REGEX_MATCH_NEWLINE_LF and #G_REGEX_MATCH_NEWLINE_CRLF match options.
|
||||||
|
These settings are also relevant when compiling a pattern if
|
||||||
|
#G_REGEX_EXTENDED is set, and an unescaped "#" outside a character class is
|
||||||
|
encountered. This indicates a comment that lasts until after the next
|
||||||
|
newline.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If you have two threads manipulating the same #GRegex, they must use a
|
||||||
|
lock to synchronize their operation, as these functions are not threadsafe.
|
||||||
|
Creating and manipulating different #GRegex structures from different
|
||||||
|
threads is not a problem.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The regular expressions low level functionalities are obtained through
|
||||||
|
the excellent <ulink url="http://www.pcre.org/">PCRE</ulink> library
|
||||||
|
written by Philip Hazel.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<!-- ##### SECTION See_Also ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<!-- ##### SECTION Stability_Level ##### -->
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### ENUM GRegexError ##### -->
|
||||||
|
<para>
|
||||||
|
Error codes returned by regular expressions functions.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@G_REGEX_ERROR_COMPILE: Compilation of the regular expression in <function>g_regex_new()</function> failed.
|
||||||
|
@G_REGEX_ERROR_OPTIMIZE: Optimization of the regular expression in <function>g_regex_optimize()</function> failed.
|
||||||
|
@G_REGEX_ERROR_REPLACE: Replacement failed due to an ill-formed replacement string.
|
||||||
|
@G_REGEX_ERROR_MATCH: The match process failed.
|
||||||
|
@Since: 2.14
|
||||||
|
|
||||||
|
<!-- ##### MACRO G_REGEX_ERROR ##### -->
|
||||||
|
<para>
|
||||||
|
Error domain for regular expressions. Errors in this domain will be from the #GRegexError enumeration. See #GError for information on error domains.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@Since: 2.14
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### ENUM GRegexCompileFlags ##### -->
|
||||||
|
<para>
|
||||||
|
Flags specifying compile-time options.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@G_REGEX_CASELESS: Letters in the pattern match both upper and lower case
|
||||||
|
letters. It be changed within a pattern by a "(?i)" option setting.
|
||||||
|
@G_REGEX_MULTILINE: By default, GRegex treats the strings as consisting
|
||||||
|
of a single line of characters (even if it actually contains newlines).
|
||||||
|
The "start of line" metacharacter ("^") matches only at the start of the
|
||||||
|
string, while the "end of line" metacharacter ("$") matches only at the
|
||||||
|
end of the string, or before a terminating newline (unless
|
||||||
|
#G_REGEX_DOLLAR_ENDONLY is set). When #G_REGEX_MULTILINE is set,
|
||||||
|
the "start of line" and "end of line" constructs match immediately following
|
||||||
|
or immediately before any newline in the string, respectively, as well
|
||||||
|
as at the very start and end. This can be changed within a pattern by a
|
||||||
|
"(?m)" option setting.
|
||||||
|
@G_REGEX_DOTALL: A dot metacharater (".") in the pattern matches all
|
||||||
|
characters, including newlines. Without it, newlines are excluded. This
|
||||||
|
option can be changed within a pattern by a ("?s") option setting.
|
||||||
|
@G_REGEX_EXTENDED: Whitespace data characters in the pattern are
|
||||||
|
totally ignored except when escaped or inside a character class.
|
||||||
|
Whitespace does not include the VT character (code 11). In addition,
|
||||||
|
characters between an unescaped "#" outside a character class and
|
||||||
|
the next newline character, inclusive, are also ignored. This can be
|
||||||
|
changed within a pattern by a "(?x)" option setting.
|
||||||
|
@G_REGEX_ANCHORED: The pattern is forced to be "anchored", that is,
|
||||||
|
it is constrained to match only at the first matching point in the string
|
||||||
|
that is being searched. This effect can also be achieved by appropriate
|
||||||
|
constructs in the pattern itself such as the "^" metacharater.
|
||||||
|
@G_REGEX_DOLLAR_ENDONLY: A dollar metacharacter ("$") in the pattern
|
||||||
|
matches only at the end of the string. Without this option, a dollar also
|
||||||
|
matches immediately before the final character if it is a newline (but
|
||||||
|
not before any other newlines). This option is ignored if
|
||||||
|
#G_REGEX_MULTILINE is set.
|
||||||
|
@G_REGEX_UNGREEDY: Inverts the "greediness" of the
|
||||||
|
quantifiers so that they are not greedy by default, but become greedy
|
||||||
|
if followed by "?". It can also be set by a "(?U)" option setting within
|
||||||
|
the pattern.
|
||||||
|
@G_REGEX_RAW: Usually strings must be valid UTF-8 strings, using this
|
||||||
|
flag they are considered as a raw sequence of bytes.
|
||||||
|
@G_REGEX_NO_AUTO_CAPTURE: Disables the use of numbered capturing
|
||||||
|
parentheses in the pattern. Any opening parenthesis that is not followed
|
||||||
|
by "?" behaves as if it were followed by "?:" but named parentheses can
|
||||||
|
still be used for capturing (and they acquire numbers in the usual way).
|
||||||
|
@G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not
|
||||||
|
be unique. This can be helpful for certain types of pattern when it is known
|
||||||
|
that only one instance of the named subpattern can ever be matched.
|
||||||
|
@G_REGEX_NEWLINE_CR: Usually any newline character is recognized, if this
|
||||||
|
option is set, the only recognized newline character is '\r'.
|
||||||
|
@G_REGEX_NEWLINE_LF: Usually any newline character is recognized, if this
|
||||||
|
option is set, the only recognized newline character is '\n'.
|
||||||
|
@G_REGEX_NEWLINE_CRLF: Usually any newline character is recognized, if this
|
||||||
|
option is set, the only recognized newline character sequence is '\r\n'.
|
||||||
|
@Since: 2.14
|
||||||
|
|
||||||
|
<!-- ##### ENUM GRegexMatchFlags ##### -->
|
||||||
|
<para>
|
||||||
|
Flags specifying match-time options.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@G_REGEX_MATCH_ANCHORED: The pattern is forced to be "anchored", that is,
|
||||||
|
it is constrained to match only at the first matching point in the string
|
||||||
|
that is being searched. This effect can also be achieved by appropriate
|
||||||
|
constructs in the pattern itself such as the "^" metacharater.
|
||||||
|
@G_REGEX_MATCH_NOTBOL: Specifies that first character of the string is
|
||||||
|
not the beginning of a line, so the circumflex metacharacter should not
|
||||||
|
match before it. Setting this without G_REGEX_MULTILINE (at compile time)
|
||||||
|
causes circumflex never to match. This option affects only the behaviour of
|
||||||
|
the circumflex metacharacter, it does not affect "\A".
|
||||||
|
@G_REGEX_MATCH_NOTEOL: Specifies that the end of the subject string is
|
||||||
|
not the end of a line, so the dollar metacharacter should not match it nor
|
||||||
|
(except in multiline mode) a newline immediately before it. Setting this
|
||||||
|
without G_REGEX_MULTILINE (at compile time) causes dollar never to match.
|
||||||
|
This option affects only the behaviour of the dollar metacharacter, it does
|
||||||
|
not affect "\Z" or "\z".
|
||||||
|
@G_REGEX_MATCH_NOTEMPTY: An empty string is not considered to be a valid
|
||||||
|
match if this option is set. If there are alternatives in the pattern, they
|
||||||
|
are tried. If all the alternatives match the empty string, the entire match
|
||||||
|
fails. For example, if the pattern "a?b?" is applied to a string not beginning
|
||||||
|
with "a" or "b", it matches the empty string at the start of the string.
|
||||||
|
With this flag set, this match is not valid, so GRegex searches further
|
||||||
|
into the string for occurrences of "a" or "b".
|
||||||
|
@G_REGEX_MATCH_PARTIAL: Turns on the partial matching feature, for more
|
||||||
|
documentation on partial matching see g_regex_is_partial_match().
|
||||||
|
@G_REGEX_MATCH_NEWLINE_CR: Overrides the newline definition set when creating
|
||||||
|
a new #GRegex, setting the '\r' character as line terminator.
|
||||||
|
@G_REGEX_MATCH_NEWLINE_LF: Overrides the newline definition set when creating
|
||||||
|
a new #GRegex, setting the '\n' character as line terminator.
|
||||||
|
@G_REGEX_MATCH_NEWLINE_CRLF: Overrides the newline definition set when creating
|
||||||
|
a new #GRegex, setting the '\r\n' characters as line terminator.
|
||||||
|
@G_REGEX_MATCH_NEWLINE_ANY: Overrides the newline definition set when creating
|
||||||
|
a new #GRegex, any newline character or character sequence is recognized.
|
||||||
|
@Since: 2.14
|
||||||
|
|
||||||
|
<!-- ##### STRUCT GRegex ##### -->
|
||||||
|
<para>
|
||||||
|
A GRegex is the "compiled" form of a regular expression pattern. This
|
||||||
|
structure is opaque and its fields cannot be accessed directly.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@Since: 2.14
|
||||||
|
|
||||||
|
<!-- ##### USER_FUNCTION GRegexEvalCallback ##### -->
|
||||||
|
<para>
|
||||||
|
Specifies the type of the function passed to g_regex_replace_eval().
|
||||||
|
It is called for each occurance of the pattern @regex in @string, and it
|
||||||
|
should append the replacement to @result.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Do not call on @regex functions that modify its internal state, such as
|
||||||
|
g_regex_match(); if you need it you can create a temporary copy of
|
||||||
|
@regex using g_regex_copy().
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@Param1: a #GRegex.
|
||||||
|
@Param2: the string used to perform matches against.
|
||||||
|
@Param3: a #GString containing the new string.
|
||||||
|
@Param4: user data passed to g_regex_replace_eval().
|
||||||
|
@Returns: %FALSE to continue the replacement process, %TRUE to stop it.
|
||||||
|
@Since: 2.14
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_new ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@pattern:
|
||||||
|
@compile_options:
|
||||||
|
@match_options:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_free ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_optimize ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_copy ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_get_pattern ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_clear ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_match_simple ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@pattern:
|
||||||
|
@string:
|
||||||
|
@compile_options:
|
||||||
|
@match_options:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_match ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@match_options:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_match_full ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@string_len:
|
||||||
|
@start_position:
|
||||||
|
@match_options:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_match_next ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@match_options:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_match_next_full ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@string_len:
|
||||||
|
@start_position:
|
||||||
|
@match_options:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_match_all ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@match_options:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_match_all_full ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@string_len:
|
||||||
|
@start_position:
|
||||||
|
@match_options:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_get_match_count ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_is_partial_match ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_fetch ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@match_num:
|
||||||
|
@string:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_fetch_pos ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@match_num:
|
||||||
|
@start_pos:
|
||||||
|
@end_pos:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_fetch_named ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@name:
|
||||||
|
@string:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_fetch_named_pos ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@name:
|
||||||
|
@start_pos:
|
||||||
|
@end_pos:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_fetch_all ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_get_string_number ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@name:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_split_simple ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@pattern:
|
||||||
|
@string:
|
||||||
|
@compile_options:
|
||||||
|
@match_options:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_split ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@match_options:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_split_full ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@string_len:
|
||||||
|
@start_position:
|
||||||
|
@match_options:
|
||||||
|
@max_tokens:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_split_next ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@match_options:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_split_next_full ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@string_len:
|
||||||
|
@start_position:
|
||||||
|
@match_options:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_expand_references ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@string_to_expand:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_replace ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@string_len:
|
||||||
|
@start_position:
|
||||||
|
@replacement:
|
||||||
|
@match_options:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_replace_literal ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@string_len:
|
||||||
|
@start_position:
|
||||||
|
@replacement:
|
||||||
|
@match_options:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_replace_eval ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@regex:
|
||||||
|
@string:
|
||||||
|
@string_len:
|
||||||
|
@start_position:
|
||||||
|
@match_options:
|
||||||
|
@eval:
|
||||||
|
@user_data:
|
||||||
|
@error:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ##### FUNCTION g_regex_escape_string ##### -->
|
||||||
|
<para>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
@string:
|
||||||
|
@length:
|
||||||
|
@Returns:
|
||||||
|
|
||||||
|
|
@ -6,9 +6,21 @@ PRINTF_SUBDIR = gnulib
|
|||||||
printf_la = gnulib/libgnulib.la
|
printf_la = gnulib/libgnulib.la
|
||||||
endif
|
endif
|
||||||
|
|
||||||
SUBDIRS = libcharset $(PRINTF_SUBDIR)
|
if ENABLE_REGEX
|
||||||
|
if USE_SYSTEM_PCRE
|
||||||
|
else
|
||||||
|
MAYBE_PCRE = pcre
|
||||||
|
endif
|
||||||
|
gregex_c = gregex.c
|
||||||
|
gregex_h = gregex.h
|
||||||
|
else
|
||||||
|
gregex_c =
|
||||||
|
gregex_h =
|
||||||
|
endif
|
||||||
|
|
||||||
DIST_SUBDIRS = libcharset gnulib
|
SUBDIRS = libcharset $(PRINTF_SUBDIR) $(MAYBE_PCRE) update-pcre
|
||||||
|
|
||||||
|
DIST_SUBDIRS = libcharset gnulib pcre update-pcre
|
||||||
|
|
||||||
INCLUDES = -I$(top_srcdir) -DG_LOG_DOMAIN=\"GLib\" \
|
INCLUDES = -I$(top_srcdir) -DG_LOG_DOMAIN=\"GLib\" \
|
||||||
$(GLIB_DEBUG_FLAGS) -DG_DISABLE_DEPRECATED -DGLIB_COMPILATION
|
$(GLIB_DEBUG_FLAGS) -DG_DISABLE_DEPRECATED -DGLIB_COMPILATION
|
||||||
@ -36,6 +48,8 @@ MIRRORING_TAB_SOURCES = \
|
|||||||
glib-mirroring-tab/packtab.h \
|
glib-mirroring-tab/packtab.h \
|
||||||
glib-mirroring-tab/packtab.c
|
glib-mirroring-tab/packtab.c
|
||||||
|
|
||||||
|
# The compilation of GRegex can be disabled, but the source files must
|
||||||
|
# be distributed.
|
||||||
EXTRA_DIST = \
|
EXTRA_DIST = \
|
||||||
makefile.msc.in \
|
makefile.msc.in \
|
||||||
glib.rc.in \
|
glib.rc.in \
|
||||||
@ -45,6 +59,8 @@ EXTRA_DIST = \
|
|||||||
abicheck.sh \
|
abicheck.sh \
|
||||||
pltcheck.sh \
|
pltcheck.sh \
|
||||||
glib.symbols \
|
glib.symbols \
|
||||||
|
gregex.c \
|
||||||
|
gregex.h \
|
||||||
$(MIRRORING_TAB_SOURCES)
|
$(MIRRORING_TAB_SOURCES)
|
||||||
|
|
||||||
# These may be in the builddir too
|
# These may be in the builddir too
|
||||||
@ -106,6 +122,7 @@ libglib_2_0_la_SOURCES = \
|
|||||||
gqueue.c \
|
gqueue.c \
|
||||||
grel.c \
|
grel.c \
|
||||||
grand.c \
|
grand.c \
|
||||||
|
$(gregex_c) \
|
||||||
gscanner.c \
|
gscanner.c \
|
||||||
gscripttable.h \
|
gscripttable.h \
|
||||||
gsequence.c \
|
gsequence.c \
|
||||||
@ -185,6 +202,7 @@ glibsubinclude_HEADERS = \
|
|||||||
gquark.h \
|
gquark.h \
|
||||||
gqueue.h \
|
gqueue.h \
|
||||||
grand.h \
|
grand.h \
|
||||||
|
$(gregex_h) \
|
||||||
grel.h \
|
grel.h \
|
||||||
gscanner.h \
|
gscanner.h \
|
||||||
gsequence.h \
|
gsequence.h \
|
||||||
@ -239,7 +257,17 @@ glib_win32_res = glib-win32-res.o
|
|||||||
glib_win32_res_ldflag = -Wl,$(glib_win32_res)
|
glib_win32_res_ldflag = -Wl,$(glib_win32_res)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
libglib_2_0_la_LIBADD = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ @ICONV_LIBS@ @G_LIBS_EXTRA@
|
if ENABLE_REGEX
|
||||||
|
if USE_SYSTEM_PCRE
|
||||||
|
pcre_lib = $(PCRE_LIBS)
|
||||||
|
else
|
||||||
|
pcre_lib = pcre/libpcre.la
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
pcre_lib =
|
||||||
|
endif
|
||||||
|
|
||||||
|
libglib_2_0_la_LIBADD = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ @ICONV_LIBS@ @G_LIBS_EXTRA@ $(pcre_lib)
|
||||||
libglib_2_0_la_DEPENDENCIES = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ $(glib_win32_res) @GLIB_DEF@
|
libglib_2_0_la_DEPENDENCIES = libcharset/libcharset.la $(printf_la) @GIO@ @GSPAWN@ @PLATFORMDEP@ $(glib_win32_res) @GLIB_DEF@
|
||||||
|
|
||||||
libglib_2_0_la_LDFLAGS = \
|
libglib_2_0_la_LDFLAGS = \
|
||||||
|
@ -62,6 +62,7 @@
|
|||||||
#include <glib/gqueue.h>
|
#include <glib/gqueue.h>
|
||||||
#include <glib/grand.h>
|
#include <glib/grand.h>
|
||||||
#include <glib/grel.h>
|
#include <glib/grel.h>
|
||||||
|
#include <glib/gregex.h>
|
||||||
#include <glib/gscanner.h>
|
#include <glib/gscanner.h>
|
||||||
#include <glib/gsequence.h>
|
#include <glib/gsequence.h>
|
||||||
#include <glib/gshell.h>
|
#include <glib/gshell.h>
|
||||||
|
@ -1416,6 +1416,43 @@ g_get_codeset
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if IN_HEADER(__G_REGEX_H__)
|
||||||
|
#if IN_FILE(__G_REGEX_C__)
|
||||||
|
g_regex_error_quark
|
||||||
|
g_regex_new
|
||||||
|
g_regex_free
|
||||||
|
g_regex_optimize
|
||||||
|
g_regex_copy
|
||||||
|
g_regex_get_pattern
|
||||||
|
g_regex_clear
|
||||||
|
g_regex_match_simple
|
||||||
|
g_regex_match
|
||||||
|
g_regex_match_full
|
||||||
|
g_regex_match_next
|
||||||
|
g_regex_match_next_full
|
||||||
|
g_regex_match_all
|
||||||
|
g_regex_match_all_full
|
||||||
|
g_regex_get_match_count
|
||||||
|
g_regex_is_partial_match
|
||||||
|
g_regex_fetch
|
||||||
|
g_regex_fetch_pos
|
||||||
|
g_regex_fetch_named
|
||||||
|
g_regex_fetch_named_pos
|
||||||
|
g_regex_fetch_all
|
||||||
|
g_regex_get_string_number
|
||||||
|
g_regex_split_simple
|
||||||
|
g_regex_split
|
||||||
|
g_regex_split_full
|
||||||
|
g_regex_split_next
|
||||||
|
g_regex_split_next_full
|
||||||
|
g_regex_expand_references
|
||||||
|
g_regex_replace
|
||||||
|
g_regex_replace_literal
|
||||||
|
g_regex_replace_eval
|
||||||
|
g_regex_escape_string
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if IN_HEADER(__G_WIN32_H__)
|
#if IN_HEADER(__G_WIN32_H__)
|
||||||
#if IN_FILE(__G_WIN32_H__)
|
#if IN_FILE(__G_WIN32_H__)
|
||||||
#ifdef G_OS_WIN32
|
#ifdef G_OS_WIN32
|
||||||
|
2448
glib/gregex.c
Normal file
2448
glib/gregex.c
Normal file
File diff suppressed because it is too large
Load Diff
197
glib/gregex.h
Normal file
197
glib/gregex.h
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
/* GRegex -- regular expression API wrapper around PCRE.
|
||||||
|
*
|
||||||
|
* Copyright (C) 1999, 2000 Scott Wimer
|
||||||
|
* Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com>
|
||||||
|
* Copyright (C) 2005 - 2006, Marco Barisione <marco@barisione.org>
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this library; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __G_REGEX_H__
|
||||||
|
#define __G_REGEX_H__
|
||||||
|
|
||||||
|
#include <glib.h>
|
||||||
|
|
||||||
|
G_BEGIN_DECLS
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
G_REGEX_ERROR_COMPILE,
|
||||||
|
G_REGEX_ERROR_OPTIMIZE,
|
||||||
|
G_REGEX_ERROR_REPLACE,
|
||||||
|
G_REGEX_ERROR_MATCH
|
||||||
|
} GRegexError;
|
||||||
|
|
||||||
|
#define G_REGEX_ERROR g_regex_error_quark ()
|
||||||
|
|
||||||
|
GQuark g_regex_error_quark (void);
|
||||||
|
|
||||||
|
/* Remember to update G_REGEX_COMPILE_MASK in gregex.c after
|
||||||
|
* adding a new flag. */
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
G_REGEX_CASELESS = 1 << 0,
|
||||||
|
G_REGEX_MULTILINE = 1 << 1,
|
||||||
|
G_REGEX_DOTALL = 1 << 2,
|
||||||
|
G_REGEX_EXTENDED = 1 << 3,
|
||||||
|
G_REGEX_ANCHORED = 1 << 4,
|
||||||
|
G_REGEX_DOLLAR_ENDONLY = 1 << 5,
|
||||||
|
G_REGEX_UNGREEDY = 1 << 9,
|
||||||
|
G_REGEX_RAW = 1 << 11,
|
||||||
|
G_REGEX_NO_AUTO_CAPTURE = 1 << 12,
|
||||||
|
G_REGEX_DUPNAMES = 1 << 19,
|
||||||
|
G_REGEX_NEWLINE_CR = 1 << 20,
|
||||||
|
G_REGEX_NEWLINE_LF = 1 << 21,
|
||||||
|
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF
|
||||||
|
} GRegexCompileFlags;
|
||||||
|
|
||||||
|
/* Remember to update G_REGEX_MATCH_MASK in gregex.c after
|
||||||
|
* adding a new flag. */
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
G_REGEX_MATCH_ANCHORED = 1 << 4,
|
||||||
|
G_REGEX_MATCH_NOTBOL = 1 << 7,
|
||||||
|
G_REGEX_MATCH_NOTEOL = 1 << 8,
|
||||||
|
G_REGEX_MATCH_NOTEMPTY = 1 << 10,
|
||||||
|
G_REGEX_MATCH_PARTIAL = 1 << 15,
|
||||||
|
G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
|
||||||
|
G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
|
||||||
|
G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
|
||||||
|
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
|
||||||
|
} GRegexMatchFlags;
|
||||||
|
|
||||||
|
typedef struct _GRegex GRegex;
|
||||||
|
|
||||||
|
typedef gboolean (*GRegexEvalCallback) (const GRegex*, const gchar*, GString*, gpointer);
|
||||||
|
|
||||||
|
|
||||||
|
GRegex *g_regex_new (const gchar *pattern,
|
||||||
|
GRegexCompileFlags compile_options,
|
||||||
|
GRegexMatchFlags match_options,
|
||||||
|
GError **error);
|
||||||
|
void g_regex_free (GRegex *regex);
|
||||||
|
gboolean g_regex_optimize (GRegex *regex,
|
||||||
|
GError **error);
|
||||||
|
GRegex *g_regex_copy (const GRegex *regex);
|
||||||
|
const gchar *g_regex_get_pattern (const GRegex *regex);
|
||||||
|
void g_regex_clear (GRegex *regex);
|
||||||
|
gboolean g_regex_match_simple (const gchar *pattern,
|
||||||
|
const gchar *string,
|
||||||
|
GRegexCompileFlags compile_options,
|
||||||
|
GRegexMatchFlags match_options);
|
||||||
|
gboolean g_regex_match (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
GRegexMatchFlags match_options);
|
||||||
|
gboolean g_regex_match_full (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
gssize string_len,
|
||||||
|
gint start_position,
|
||||||
|
GRegexMatchFlags match_options,
|
||||||
|
GError **error);
|
||||||
|
gboolean g_regex_match_next (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
GRegexMatchFlags match_options);
|
||||||
|
gboolean g_regex_match_next_full (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
gssize string_len,
|
||||||
|
gint start_position,
|
||||||
|
GRegexMatchFlags match_options,
|
||||||
|
GError **error);
|
||||||
|
gboolean g_regex_match_all (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
GRegexMatchFlags match_options);
|
||||||
|
gboolean g_regex_match_all_full (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
gssize string_len,
|
||||||
|
gint start_position,
|
||||||
|
GRegexMatchFlags match_options,
|
||||||
|
GError **error);
|
||||||
|
gint g_regex_get_match_count (const GRegex *regex);
|
||||||
|
gboolean g_regex_is_partial_match (const GRegex *regex);
|
||||||
|
gchar *g_regex_fetch (const GRegex *regex,
|
||||||
|
gint match_num,
|
||||||
|
const gchar *string);
|
||||||
|
gboolean g_regex_fetch_pos (const GRegex *regex,
|
||||||
|
gint match_num,
|
||||||
|
gint *start_pos,
|
||||||
|
gint *end_pos);
|
||||||
|
gchar *g_regex_fetch_named (const GRegex *regex,
|
||||||
|
const gchar *name,
|
||||||
|
const gchar *string);
|
||||||
|
gboolean g_regex_fetch_named_pos (const GRegex *regex,
|
||||||
|
const gchar *name,
|
||||||
|
gint *start_pos,
|
||||||
|
gint *end_pos);
|
||||||
|
gchar **g_regex_fetch_all (const GRegex *regex,
|
||||||
|
const gchar *string);
|
||||||
|
gint g_regex_get_string_number (const GRegex *regex,
|
||||||
|
const gchar *name);
|
||||||
|
gchar **g_regex_split_simple (const gchar *pattern,
|
||||||
|
const gchar *string,
|
||||||
|
GRegexCompileFlags compile_options,
|
||||||
|
GRegexMatchFlags match_options);
|
||||||
|
gchar **g_regex_split (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
GRegexMatchFlags match_options);
|
||||||
|
gchar **g_regex_split_full (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
gssize string_len,
|
||||||
|
gint start_position,
|
||||||
|
GRegexMatchFlags match_options,
|
||||||
|
gint max_tokens,
|
||||||
|
GError **error);
|
||||||
|
gchar *g_regex_split_next (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
GRegexMatchFlags match_options);
|
||||||
|
gchar *g_regex_split_next_full (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
gssize string_len,
|
||||||
|
gint start_position,
|
||||||
|
GRegexMatchFlags match_options,
|
||||||
|
GError **error);
|
||||||
|
gchar *g_regex_expand_references (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
const gchar *string_to_expand,
|
||||||
|
GError **error);
|
||||||
|
gchar *g_regex_replace (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
gssize string_len,
|
||||||
|
gint start_position,
|
||||||
|
const gchar *replacement,
|
||||||
|
GRegexMatchFlags match_options,
|
||||||
|
GError **error);
|
||||||
|
gchar *g_regex_replace_literal (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
gssize string_len,
|
||||||
|
gint start_position,
|
||||||
|
const gchar *replacement,
|
||||||
|
GRegexMatchFlags match_options,
|
||||||
|
GError **error);
|
||||||
|
gchar *g_regex_replace_eval (GRegex *regex,
|
||||||
|
const gchar *string,
|
||||||
|
gssize string_len,
|
||||||
|
gint start_position,
|
||||||
|
GRegexMatchFlags match_options,
|
||||||
|
GRegexEvalCallback eval,
|
||||||
|
gpointer user_data,
|
||||||
|
GError **error);
|
||||||
|
gchar *g_regex_escape_string (const gchar *string,
|
||||||
|
gint length);
|
||||||
|
|
||||||
|
|
||||||
|
G_END_DECLS
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* __G_REGEX_H__ */
|
@ -17,6 +17,7 @@ all : \
|
|||||||
galias.h \
|
galias.h \
|
||||||
galiasdef.c \
|
galiasdef.c \
|
||||||
gnulib\gnulib.lib \
|
gnulib\gnulib.lib \
|
||||||
|
pcre\pcre.lib \
|
||||||
libglib-2.0-0.dll \
|
libglib-2.0-0.dll \
|
||||||
glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib \
|
glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib \
|
||||||
gspawn-win32-helper.exe \
|
gspawn-win32-helper.exe \
|
||||||
@ -27,6 +28,11 @@ gnulib\gnulib.lib :
|
|||||||
nmake -f makefile.msc
|
nmake -f makefile.msc
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
|
pcre\pcre.lib :
|
||||||
|
cd pcre
|
||||||
|
nmake -f makefile.msc
|
||||||
|
cd ..
|
||||||
|
|
||||||
glib_OBJECTS = \
|
glib_OBJECTS = \
|
||||||
garray.obj \
|
garray.obj \
|
||||||
gasyncqueue.obj \
|
gasyncqueue.obj \
|
||||||
@ -61,6 +67,7 @@ glib_OBJECTS = \
|
|||||||
gpattern.obj \
|
gpattern.obj \
|
||||||
gprintf.obj \
|
gprintf.obj \
|
||||||
grand.obj \
|
grand.obj \
|
||||||
|
gregex.obj \
|
||||||
grel.obj \
|
grel.obj \
|
||||||
gscanner.obj \
|
gscanner.obj \
|
||||||
gsequence.obj \
|
gsequence.obj \
|
||||||
@ -112,12 +119,12 @@ glib.res : glib.rc
|
|||||||
|
|
||||||
# create a static libary
|
# create a static libary
|
||||||
# static library can well have the real version number in the name
|
# static library can well have the real version number in the name
|
||||||
glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib : $(glib_OBJECTS) gnulib\gnulib.lib
|
glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib : $(glib_OBJECTS) gnulib\gnulib.lib pcre\pcre.lib
|
||||||
lib /out:glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib $(glib_OBJECTS) gnulib\gnulib.lib
|
lib /out:glib-@GLIB_MAJOR_VERSION@.@GLIB_MINOR_VERSION@s.lib $(glib_OBJECTS) gnulib\gnulib.lib pcre\pcre.lib
|
||||||
|
|
||||||
libglib-2.0-0.dll : $(glib_OBJECTS) gnulib\gnulib.lib glib.def glib.res
|
libglib-2.0-0.dll : $(glib_OBJECTS) gnulib\gnulib.lib pcre\pcre.lib glib.def glib.res
|
||||||
$(CC) $(CFLAGS) -LD -Fe$@ $(glib_OBJECTS) glib.res $(LIBICONV_LIBS) $(INTL_LIBS) \
|
$(CC) $(CFLAGS) -LD -Fe$@ $(glib_OBJECTS) glib.res $(LIBICONV_LIBS) $(INTL_LIBS) \
|
||||||
gnulib\gnulib.lib $(DIRENT_LIBS) user32.lib advapi32.lib shell32.lib wsock32.lib ole32.lib ws2_32.lib \
|
gnulib\gnulib.lib pcre\pcre.lib $(DIRENT_LIBS) user32.lib advapi32.lib shell32.lib wsock32.lib ole32.lib ws2_32.lib \
|
||||||
$(LDFLAGS) /implib:glib-2.0.lib /def:glib.def
|
$(LDFLAGS) /implib:glib-2.0.lib /def:glib.def
|
||||||
|
|
||||||
gspawn-win32-helper.exe : gspawn-win32-helper.c libglib-2.0-@LT_CURRENT_MINUS_AGE@.dll
|
gspawn-win32-helper.exe : gspawn-win32-helper.c libglib-2.0-@LT_CURRENT_MINUS_AGE@.dll
|
||||||
|
68
glib/pcre/COPYING
Normal file
68
glib/pcre/COPYING
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
PCRE LICENCE
|
||||||
|
------------
|
||||||
|
|
||||||
|
PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||||
|
specified below. The documentation for PCRE, supplied in the "doc"
|
||||||
|
directory, is distributed under the same terms as the software itself.
|
||||||
|
|
||||||
|
The basic library functions are written in C and are freestanding. Also
|
||||||
|
included in the distribution is a set of C++ wrapper functions.
|
||||||
|
|
||||||
|
|
||||||
|
THE BASIC LIBRARY FUNCTIONS
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
Written by: Philip Hazel
|
||||||
|
Email local part: ph10
|
||||||
|
Email domain: cam.ac.uk
|
||||||
|
|
||||||
|
University of Cambridge Computing Service,
|
||||||
|
Cambridge, England. Phone: +44 1223 334714.
|
||||||
|
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
THE C++ WRAPPER FUNCTIONS
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
Contributed by: Google Inc.
|
||||||
|
|
||||||
|
Copyright (c) 2006, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
THE "BSD" LICENCE
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the name of Google
|
||||||
|
Inc. nor the names of their contributors may be used to endorse or
|
||||||
|
promote products derived from this software without specific prior
|
||||||
|
written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
End
|
62
glib/pcre/Makefile.am
Normal file
62
glib/pcre/Makefile.am
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
INCLUDES = \
|
||||||
|
-DG_LOG_DOMAIN=\"GLib-GRegex\" \
|
||||||
|
-DSUPPORT_UCP \
|
||||||
|
-DSUPPORT_UTF8 \
|
||||||
|
-DNEWLINE=-1 \
|
||||||
|
-DMATCH_LIMIT=10000000 \
|
||||||
|
-DMATCH_LIMIT_RECURSION=10000000 \
|
||||||
|
-DMAX_NAME_SIZE=32 \
|
||||||
|
-DMAX_NAME_COUNT=10000 \
|
||||||
|
-DMAX_DUPLENGTH=30000 \
|
||||||
|
-DLINK_SIZE=2 \
|
||||||
|
-DEBCDIC=0 \
|
||||||
|
-DPOSIX_MALLOC_THRESHOLD=10 \
|
||||||
|
-I$(top_srcdir) \
|
||||||
|
-I$(srcdir) \
|
||||||
|
-I$(top_srcdir)/glib \
|
||||||
|
@GLIB_DEBUG_FLAGS@ \
|
||||||
|
-DG_DISABLE_DEPRECATED \
|
||||||
|
$(DEPRECATED_FLAGS)\
|
||||||
|
$(WARN_CFLAGS) \
|
||||||
|
$(PCRE_WARN_CFLAGS) \
|
||||||
|
$(DEP_CFLAGS)
|
||||||
|
|
||||||
|
noinst_LTLIBRARIES = libpcre.la
|
||||||
|
|
||||||
|
libpcre_headers =
|
||||||
|
|
||||||
|
libpcre_la_SOURCES = \
|
||||||
|
pcre_chartables.c \
|
||||||
|
pcre_compile.c \
|
||||||
|
pcre_config.c \
|
||||||
|
pcre_dfa_exec.c \
|
||||||
|
pcre_exec.c \
|
||||||
|
pcre_fullinfo.c \
|
||||||
|
pcre_get.c \
|
||||||
|
pcre_globals.c \
|
||||||
|
pcre_info.c \
|
||||||
|
pcre_maketables.c \
|
||||||
|
pcre_newline.c \
|
||||||
|
pcre_ord2utf8.c \
|
||||||
|
pcre_refcount.c \
|
||||||
|
pcre_study.c \
|
||||||
|
pcre_tables.c \
|
||||||
|
pcre_try_flipped.c \
|
||||||
|
pcre_ucp_searchfuncs.c \
|
||||||
|
pcre_valid_utf8.c \
|
||||||
|
pcre_version.c \
|
||||||
|
pcre_xclass.c \
|
||||||
|
pcre.h \
|
||||||
|
pcre_internal.h \
|
||||||
|
ucp.h \
|
||||||
|
ucpinternal.h \
|
||||||
|
$(libpcre_headers)
|
||||||
|
|
||||||
|
libpcre_la_LIBADD = $(DEP_LIBS)
|
||||||
|
|
||||||
|
libpcre_la_LDFLAGS = -no-undefined
|
||||||
|
|
||||||
|
EXTRA_DIST = \
|
||||||
|
COPYING \
|
||||||
|
makefile.msc
|
||||||
|
|
49
glib/pcre/makefile.msc
Normal file
49
glib/pcre/makefile.msc
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
!IFDEF DEBUG
|
||||||
|
CRT=-MDd
|
||||||
|
!ELSE
|
||||||
|
CRT=-MD
|
||||||
|
!ENDIF
|
||||||
|
|
||||||
|
CFLAGS = \
|
||||||
|
-I ..\.. \
|
||||||
|
-DHAVE_CONFIG_H \
|
||||||
|
-DHAVE_LONG_LONG_FORMAT \
|
||||||
|
-DSUPPORT_UCP \
|
||||||
|
-DSUPPORT_UTF8 \
|
||||||
|
-DNEWLINE=10 \
|
||||||
|
-DMATCH_LIMIT=10000000 \
|
||||||
|
-DMATCH_LIMIT_RECURSION=10000000 \
|
||||||
|
-DMAX_NAME_SIZE=32 \
|
||||||
|
-DMAX_NAME_COUNT=10000 \
|
||||||
|
-DMAX_DUPLENGTH=30000 \
|
||||||
|
-DLINK_SIZE=2 \
|
||||||
|
-DEBCDIC=0 \
|
||||||
|
-DPOSIX_MALLOC_THRESHOLD=10
|
||||||
|
|
||||||
|
OBJECTS = \
|
||||||
|
pcre_chartables.obj \
|
||||||
|
pcre_compile.obj \
|
||||||
|
pcre_config.obj \
|
||||||
|
pcre_dfa_exec.obj \
|
||||||
|
pcre_exec.obj \
|
||||||
|
pcre_fullinfo.obj \
|
||||||
|
pcre_get.obj \
|
||||||
|
pcre_globals.obj \
|
||||||
|
pcre_info.obj \
|
||||||
|
pcre_maketables.obj \
|
||||||
|
pcre_newline.obj \
|
||||||
|
pcre_ord2utf8.obj \
|
||||||
|
pcre_refcount.obj \
|
||||||
|
pcre_study.obj \
|
||||||
|
pcre_tables.obj \
|
||||||
|
pcre_try_flipped.obj \
|
||||||
|
pcre_ucp_searchfuncs.obj \
|
||||||
|
pcre_valid_utf8.obj \
|
||||||
|
pcre_version.obj \
|
||||||
|
pcre_xclass.obj \
|
||||||
|
|
||||||
|
pcre.lib : $(OBJECTS)
|
||||||
|
lib -out:pcre.lib $(OBJECTS)
|
||||||
|
|
||||||
|
.c.obj:
|
||||||
|
$(CC) $(CRT) $(CFLAGS) -Ox -GD -c $<
|
283
glib/pcre/pcre.h
Normal file
283
glib/pcre/pcre.h
Normal file
@ -0,0 +1,283 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is the public header file for the PCRE library, to be #included by
|
||||||
|
applications that call the PCRE functions.
|
||||||
|
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _PCRE_H
|
||||||
|
#define _PCRE_H
|
||||||
|
|
||||||
|
/* The current PCRE version information. */
|
||||||
|
|
||||||
|
/* NOTES FOR FUTURE MAINTAINERS: Do not use numbers with leading zeros, because
|
||||||
|
they may be treated as octal constants. The PCRE_PRERELEASE feature is for
|
||||||
|
identifying release candidates. It might be defined as -RC2, for example. In
|
||||||
|
real releases, it should be defined empty. Do not change the alignment of these
|
||||||
|
statments. The code in ./configure greps out the version numbers by using "cut"
|
||||||
|
to get values from column 29 onwards. These are substituted into pcre-config
|
||||||
|
and libpcre.pc. The values are not put into configure.ac and substituted here
|
||||||
|
(which would simplify this issue) because that makes life harder for those who
|
||||||
|
cannot run ./configure. As it now stands, this file need not be edited in that
|
||||||
|
circumstance. */
|
||||||
|
|
||||||
|
#define PCRE_MAJOR 7
|
||||||
|
#define PCRE_MINOR 0
|
||||||
|
#define PCRE_PRERELEASE
|
||||||
|
#define PCRE_DATE 18-Dec-2006
|
||||||
|
|
||||||
|
/* Win32 uses DLL by default; it needs special stuff for exported functions
|
||||||
|
when building PCRE. */
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# ifdef PCRE_DEFINITION
|
||||||
|
# ifdef DLL_EXPORT
|
||||||
|
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||||
|
# endif
|
||||||
|
# else
|
||||||
|
# ifndef PCRE_STATIC
|
||||||
|
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Otherwise, we use the standard "extern". */
|
||||||
|
|
||||||
|
#ifndef PCRE_DATA_SCOPE
|
||||||
|
# ifdef __cplusplus
|
||||||
|
# define PCRE_DATA_SCOPE extern "C"
|
||||||
|
# else
|
||||||
|
# define PCRE_DATA_SCOPE extern
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
||||||
|
it is needed here for malloc. */
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/* Allow for C++ users */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Options */
|
||||||
|
|
||||||
|
#define PCRE_CASELESS 0x00000001
|
||||||
|
#define PCRE_MULTILINE 0x00000002
|
||||||
|
#define PCRE_DOTALL 0x00000004
|
||||||
|
#define PCRE_EXTENDED 0x00000008
|
||||||
|
#define PCRE_ANCHORED 0x00000010
|
||||||
|
#define PCRE_DOLLAR_ENDONLY 0x00000020
|
||||||
|
#define PCRE_EXTRA 0x00000040
|
||||||
|
#define PCRE_NOTBOL 0x00000080
|
||||||
|
#define PCRE_NOTEOL 0x00000100
|
||||||
|
#define PCRE_UNGREEDY 0x00000200
|
||||||
|
#define PCRE_NOTEMPTY 0x00000400
|
||||||
|
#define PCRE_UTF8 0x00000800
|
||||||
|
#define PCRE_NO_AUTO_CAPTURE 0x00001000
|
||||||
|
#define PCRE_NO_UTF8_CHECK 0x00002000
|
||||||
|
#define PCRE_AUTO_CALLOUT 0x00004000
|
||||||
|
#define PCRE_PARTIAL 0x00008000
|
||||||
|
#define PCRE_DFA_SHORTEST 0x00010000
|
||||||
|
#define PCRE_DFA_RESTART 0x00020000
|
||||||
|
#define PCRE_FIRSTLINE 0x00040000
|
||||||
|
#define PCRE_DUPNAMES 0x00080000
|
||||||
|
#define PCRE_NEWLINE_CR 0x00100000
|
||||||
|
#define PCRE_NEWLINE_LF 0x00200000
|
||||||
|
#define PCRE_NEWLINE_CRLF 0x00300000
|
||||||
|
#define PCRE_NEWLINE_ANY 0x00400000
|
||||||
|
|
||||||
|
/* Exec-time and get/set-time error codes */
|
||||||
|
|
||||||
|
#define PCRE_ERROR_NOMATCH (-1)
|
||||||
|
#define PCRE_ERROR_NULL (-2)
|
||||||
|
#define PCRE_ERROR_BADOPTION (-3)
|
||||||
|
#define PCRE_ERROR_BADMAGIC (-4)
|
||||||
|
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
|
||||||
|
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
|
||||||
|
#define PCRE_ERROR_NOMEMORY (-6)
|
||||||
|
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||||
|
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||||
|
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||||
|
#define PCRE_ERROR_BADUTF8 (-10)
|
||||||
|
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
|
||||||
|
#define PCRE_ERROR_PARTIAL (-12)
|
||||||
|
#define PCRE_ERROR_BADPARTIAL (-13)
|
||||||
|
#define PCRE_ERROR_INTERNAL (-14)
|
||||||
|
#define PCRE_ERROR_BADCOUNT (-15)
|
||||||
|
#define PCRE_ERROR_DFA_UITEM (-16)
|
||||||
|
#define PCRE_ERROR_DFA_UCOND (-17)
|
||||||
|
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
||||||
|
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||||
|
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||||
|
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||||
|
#define PCRE_ERROR_NULLWSLIMIT (-22)
|
||||||
|
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||||
|
|
||||||
|
/* Request types for pcre_fullinfo() */
|
||||||
|
|
||||||
|
#define PCRE_INFO_OPTIONS 0
|
||||||
|
#define PCRE_INFO_SIZE 1
|
||||||
|
#define PCRE_INFO_CAPTURECOUNT 2
|
||||||
|
#define PCRE_INFO_BACKREFMAX 3
|
||||||
|
#define PCRE_INFO_FIRSTBYTE 4
|
||||||
|
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
||||||
|
#define PCRE_INFO_FIRSTTABLE 5
|
||||||
|
#define PCRE_INFO_LASTLITERAL 6
|
||||||
|
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||||
|
#define PCRE_INFO_NAMECOUNT 8
|
||||||
|
#define PCRE_INFO_NAMETABLE 9
|
||||||
|
#define PCRE_INFO_STUDYSIZE 10
|
||||||
|
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||||
|
|
||||||
|
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||||
|
compatible. */
|
||||||
|
|
||||||
|
#define PCRE_CONFIG_UTF8 0
|
||||||
|
#define PCRE_CONFIG_NEWLINE 1
|
||||||
|
#define PCRE_CONFIG_LINK_SIZE 2
|
||||||
|
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
||||||
|
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||||
|
#define PCRE_CONFIG_STACKRECURSE 5
|
||||||
|
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||||
|
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||||
|
|
||||||
|
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
|
||||||
|
these bits, just add new ones on the end, in order to remain compatible. */
|
||||||
|
|
||||||
|
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||||
|
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||||
|
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||||
|
#define PCRE_EXTRA_TABLES 0x0008
|
||||||
|
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
||||||
|
|
||||||
|
/* Types */
|
||||||
|
|
||||||
|
struct real_pcre; /* declaration; the definition is private */
|
||||||
|
typedef struct real_pcre pcre;
|
||||||
|
|
||||||
|
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
||||||
|
replaced with a custom type. For conventional use, the public interface is a
|
||||||
|
const char *. */
|
||||||
|
|
||||||
|
#ifndef PCRE_SPTR
|
||||||
|
#define PCRE_SPTR const char *
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||||
|
such as way as to be extensible. Always add new fields at the end, in order to
|
||||||
|
remain compatible. */
|
||||||
|
|
||||||
|
typedef struct pcre_extra {
|
||||||
|
unsigned long int flags; /* Bits for which fields are set */
|
||||||
|
void *study_data; /* Opaque data from pcre_study() */
|
||||||
|
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||||
|
void *callout_data; /* Data passed back in callouts */
|
||||||
|
const unsigned char *tables; /* Pointer to character tables */
|
||||||
|
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||||
|
} pcre_extra;
|
||||||
|
|
||||||
|
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||||
|
structure so that new fields can be added on the end in future versions,
|
||||||
|
without changing the API of the function, thereby allowing old clients to work
|
||||||
|
without modification. */
|
||||||
|
|
||||||
|
typedef struct pcre_callout_block {
|
||||||
|
int version; /* Identifies version of block */
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */
|
||||||
|
int callout_number; /* Number compiled into pattern */
|
||||||
|
int *offset_vector; /* The offset vector */
|
||||||
|
PCRE_SPTR subject; /* The subject being matched */
|
||||||
|
int subject_length; /* The length of the subject */
|
||||||
|
int start_match; /* Offset to start of this match attempt */
|
||||||
|
int current_position; /* Where we currently are in the subject */
|
||||||
|
int capture_top; /* Max current capture */
|
||||||
|
int capture_last; /* Most recently closed capture */
|
||||||
|
void *callout_data; /* Data passed in with the call */
|
||||||
|
/* ------------------- Added for Version 1 -------------------------- */
|
||||||
|
int pattern_position; /* Offset to next item in the pattern */
|
||||||
|
int next_item_length; /* Length of next item in the pattern */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
} pcre_callout_block;
|
||||||
|
|
||||||
|
#include "glib.h"
|
||||||
|
#include "galias.h"
|
||||||
|
|
||||||
|
#define pcre_malloc g_try_malloc
|
||||||
|
#define pcre_free g_free
|
||||||
|
#define pcre_stack_malloc g_try_malloc
|
||||||
|
|
||||||
|
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
||||||
|
|
||||||
|
/* Exported PCRE functions */
|
||||||
|
|
||||||
|
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
|
||||||
|
const unsigned char *);
|
||||||
|
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||||
|
int *, const unsigned char *);
|
||||||
|
PCRE_DATA_SCOPE int pcre_config(int, void *);
|
||||||
|
PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *,
|
||||||
|
int *, int, const char *, char *, int);
|
||||||
|
PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||||
|
int);
|
||||||
|
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||||
|
const char *, int, int, int, int *, int , int *, int);
|
||||||
|
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||||
|
int, int, int, int *, int);
|
||||||
|
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
|
||||||
|
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
|
||||||
|
PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||||
|
void *);
|
||||||
|
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
|
||||||
|
int *, int, const char *, const char **);
|
||||||
|
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
|
||||||
|
PCRE_DATA_SCOPE int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||||
|
char **, char **);
|
||||||
|
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
|
||||||
|
const char **);
|
||||||
|
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
|
||||||
|
const char ***);
|
||||||
|
PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *);
|
||||||
|
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
|
||||||
|
PCRE_DATA_SCOPE int pcre_refcount(pcre *, int);
|
||||||
|
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||||
|
PCRE_DATA_SCOPE const char *pcre_version(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} /* extern "C" */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* End of pcre.h */
|
195
glib/pcre/pcre_chartables.c
Normal file
195
glib/pcre/pcre_chartables.c
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||||
|
* the update of the local copy of PCRE.
|
||||||
|
*/
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This file is automatically written by the dftables auxiliary
|
||||||
|
program. If you edit it by hand, you might like to edit the Makefile to
|
||||||
|
prevent its ever being regenerated.
|
||||||
|
|
||||||
|
This file contains the default tables for characters with codes less than
|
||||||
|
128 (ASCII characters). These tables are used when no external tables are
|
||||||
|
passed to PCRE.
|
||||||
|
|
||||||
|
The following #include is present because without it gcc 4.x may remove
|
||||||
|
the array definition from the final binary if PCRE is built into a static
|
||||||
|
library and dead code stripping is activated. This leads to link errors.
|
||||||
|
Pulling in the header ensures that the array gets flagged as "someone
|
||||||
|
outside this compilation unit might reference this" and so it will always
|
||||||
|
be supplied to the linker. */
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
const unsigned char _pcre_default_tables[] = {
|
||||||
|
|
||||||
|
/* This table is a lower casing table. */
|
||||||
|
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7,
|
||||||
|
8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
|
16, 17, 18, 19, 20, 21, 22, 23,
|
||||||
|
24, 25, 26, 27, 28, 29, 30, 31,
|
||||||
|
32, 33, 34, 35, 36, 37, 38, 39,
|
||||||
|
40, 41, 42, 43, 44, 45, 46, 47,
|
||||||
|
48, 49, 50, 51, 52, 53, 54, 55,
|
||||||
|
56, 57, 58, 59, 60, 61, 62, 63,
|
||||||
|
64, 97, 98, 99,100,101,102,103,
|
||||||
|
104,105,106,107,108,109,110,111,
|
||||||
|
112,113,114,115,116,117,118,119,
|
||||||
|
120,121,122, 91, 92, 93, 94, 95,
|
||||||
|
96, 97, 98, 99,100,101,102,103,
|
||||||
|
104,105,106,107,108,109,110,111,
|
||||||
|
112,113,114,115,116,117,118,119,
|
||||||
|
120,121,122,123,124,125,126,127,
|
||||||
|
128,129,130,131,132,133,134,135,
|
||||||
|
136,137,138,139,140,141,142,143,
|
||||||
|
144,145,146,147,148,149,150,151,
|
||||||
|
152,153,154,155,156,157,158,159,
|
||||||
|
160,161,162,163,164,165,166,167,
|
||||||
|
168,169,170,171,172,173,174,175,
|
||||||
|
176,177,178,179,180,181,182,183,
|
||||||
|
184,185,186,187,188,189,190,191,
|
||||||
|
192,193,194,195,196,197,198,199,
|
||||||
|
200,201,202,203,204,205,206,207,
|
||||||
|
208,209,210,211,212,213,214,215,
|
||||||
|
216,217,218,219,220,221,222,223,
|
||||||
|
224,225,226,227,228,229,230,231,
|
||||||
|
232,233,234,235,236,237,238,239,
|
||||||
|
240,241,242,243,244,245,246,247,
|
||||||
|
248,249,250,251,252,253,254,255,
|
||||||
|
|
||||||
|
/* This table is a case flipping table. */
|
||||||
|
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7,
|
||||||
|
8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
|
16, 17, 18, 19, 20, 21, 22, 23,
|
||||||
|
24, 25, 26, 27, 28, 29, 30, 31,
|
||||||
|
32, 33, 34, 35, 36, 37, 38, 39,
|
||||||
|
40, 41, 42, 43, 44, 45, 46, 47,
|
||||||
|
48, 49, 50, 51, 52, 53, 54, 55,
|
||||||
|
56, 57, 58, 59, 60, 61, 62, 63,
|
||||||
|
64, 97, 98, 99,100,101,102,103,
|
||||||
|
104,105,106,107,108,109,110,111,
|
||||||
|
112,113,114,115,116,117,118,119,
|
||||||
|
120,121,122, 91, 92, 93, 94, 95,
|
||||||
|
96, 65, 66, 67, 68, 69, 70, 71,
|
||||||
|
72, 73, 74, 75, 76, 77, 78, 79,
|
||||||
|
80, 81, 82, 83, 84, 85, 86, 87,
|
||||||
|
88, 89, 90,123,124,125,126,127,
|
||||||
|
128,129,130,131,132,133,134,135,
|
||||||
|
136,137,138,139,140,141,142,143,
|
||||||
|
144,145,146,147,148,149,150,151,
|
||||||
|
152,153,154,155,156,157,158,159,
|
||||||
|
160,161,162,163,164,165,166,167,
|
||||||
|
168,169,170,171,172,173,174,175,
|
||||||
|
176,177,178,179,180,181,182,183,
|
||||||
|
184,185,186,187,188,189,190,191,
|
||||||
|
192,193,194,195,196,197,198,199,
|
||||||
|
200,201,202,203,204,205,206,207,
|
||||||
|
208,209,210,211,212,213,214,215,
|
||||||
|
216,217,218,219,220,221,222,223,
|
||||||
|
224,225,226,227,228,229,230,231,
|
||||||
|
232,233,234,235,236,237,238,239,
|
||||||
|
240,241,242,243,244,245,246,247,
|
||||||
|
248,249,250,251,252,253,254,255,
|
||||||
|
|
||||||
|
/* This table contains bit maps for various character classes.
|
||||||
|
Each map is 32 bytes long and the bits run from the least
|
||||||
|
significant end of each byte. The classes that have their own
|
||||||
|
maps are: space, xdigit, digit, upper, lower, word, graph
|
||||||
|
print, punct, and cntrl. Other classes are built from combinations. */
|
||||||
|
|
||||||
|
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||||
|
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||||
|
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
|
||||||
|
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
|
||||||
|
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
|
||||||
|
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
/* This table identifies various classes of character by individual bits:
|
||||||
|
0x01 white space character
|
||||||
|
0x02 letter
|
||||||
|
0x04 decimal digit
|
||||||
|
0x08 hexadecimal digit
|
||||||
|
0x10 alphanumeric or '_'
|
||||||
|
0x80 regular expression metacharacter or binary zero
|
||||||
|
*/
|
||||||
|
|
||||||
|
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||||
|
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||||
|
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||||
|
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
|
||||||
|
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||||
|
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
|
||||||
|
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
|
||||||
|
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||||
|
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||||
|
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
|
||||||
|
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
|
||||||
|
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
|
||||||
|
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
|
||||||
|
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||||
|
|
||||||
|
/* End of chartables.c */
|
5385
glib/pcre/pcre_compile.c
Normal file
5385
glib/pcre/pcre_compile.c
Normal file
File diff suppressed because it is too large
Load Diff
116
glib/pcre/pcre_config.c
Normal file
116
glib/pcre/pcre_config.c
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_config(). */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Return info about what features are configured *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function has an extensible interface so that additional items can be
|
||||||
|
added compatibly.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
what what information is required
|
||||||
|
where where to put the information
|
||||||
|
|
||||||
|
Returns: 0 if data returned, negative on error
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_DATA_SCOPE int
|
||||||
|
pcre_config(int what, void *where)
|
||||||
|
{
|
||||||
|
switch (what)
|
||||||
|
{
|
||||||
|
case PCRE_CONFIG_UTF8:
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
|
*((int *)where) = 1;
|
||||||
|
#else
|
||||||
|
*((int *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
||||||
|
#ifdef SUPPORT_UCP
|
||||||
|
*((int *)where) = 1;
|
||||||
|
#else
|
||||||
|
*((int *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_NEWLINE:
|
||||||
|
*((int *)where) = NEWLINE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_LINK_SIZE:
|
||||||
|
*((int *)where) = LINK_SIZE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
||||||
|
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_MATCH_LIMIT:
|
||||||
|
*((unsigned int *)where) = MATCH_LIMIT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
||||||
|
*((unsigned int *)where) = MATCH_LIMIT_RECURSION;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_STACKRECURSE:
|
||||||
|
#ifdef NO_RECURSE
|
||||||
|
*((int *)where) = 0;
|
||||||
|
#else
|
||||||
|
*((int *)where) = 1;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: return PCRE_ERROR_BADOPTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_config.c */
|
2433
glib/pcre/pcre_dfa_exec.c
Normal file
2433
glib/pcre/pcre_dfa_exec.c
Normal file
File diff suppressed because it is too large
Load Diff
4199
glib/pcre/pcre_exec.c
Normal file
4199
glib/pcre/pcre_exec.c
Normal file
File diff suppressed because it is too large
Load Diff
149
glib/pcre/pcre_fullinfo.c
Normal file
149
glib/pcre/pcre_fullinfo.c
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/*PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_fullinfo(), which returns
|
||||||
|
information about a compiled pattern. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Return info about compiled pattern *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is a newer "info" function which has an extensible interface so
|
||||||
|
that additional items can be added compatibly.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
argument_re points to compiled code
|
||||||
|
extra_data points extra data, or NULL
|
||||||
|
what what information is required
|
||||||
|
where where to put the information
|
||||||
|
|
||||||
|
Returns: 0 if data returned, negative on error
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_DATA_SCOPE int
|
||||||
|
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||||
|
void *where)
|
||||||
|
{
|
||||||
|
real_pcre internal_re;
|
||||||
|
pcre_study_data internal_study;
|
||||||
|
const real_pcre *re = (const real_pcre *)argument_re;
|
||||||
|
const pcre_study_data *study = NULL;
|
||||||
|
|
||||||
|
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
||||||
|
|
||||||
|
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||||
|
study = (const pcre_study_data *)extra_data->study_data;
|
||||||
|
|
||||||
|
if (re->magic_number != MAGIC_NUMBER)
|
||||||
|
{
|
||||||
|
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
|
||||||
|
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
||||||
|
if (study != NULL) study = &internal_study;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (what)
|
||||||
|
{
|
||||||
|
case PCRE_INFO_OPTIONS:
|
||||||
|
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_SIZE:
|
||||||
|
*((size_t *)where) = re->size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_STUDYSIZE:
|
||||||
|
*((size_t *)where) = (study == NULL)? 0 : study->size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_CAPTURECOUNT:
|
||||||
|
*((int *)where) = re->top_bracket;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_BACKREFMAX:
|
||||||
|
*((int *)where) = re->top_backref;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_FIRSTBYTE:
|
||||||
|
*((int *)where) =
|
||||||
|
((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||||
|
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Make sure we pass back the pointer to the bit vector in the external
|
||||||
|
block, not the internal copy (with flipped integer fields). */
|
||||||
|
|
||||||
|
case PCRE_INFO_FIRSTTABLE:
|
||||||
|
*((const uschar **)where) =
|
||||||
|
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
|
||||||
|
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_LASTLITERAL:
|
||||||
|
*((int *)where) =
|
||||||
|
((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_NAMEENTRYSIZE:
|
||||||
|
*((int *)where) = re->name_entry_size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_NAMECOUNT:
|
||||||
|
*((int *)where) = re->name_count;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_NAMETABLE:
|
||||||
|
*((const uschar **)where) = (const uschar *)re + re->name_table_offset;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_DEFAULT_TABLES:
|
||||||
|
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: return PCRE_ERROR_BADOPTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_fullinfo.c */
|
461
glib/pcre/pcre_get.c
Normal file
461
glib/pcre/pcre_get.c
Normal file
@ -0,0 +1,461 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains some convenience functions for extracting substrings
|
||||||
|
from the subject string after a regex match has succeeded. The original idea
|
||||||
|
for these functions came from Scott Wimer. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Find number for named string *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is used by the get_first_set() function below, as well
|
||||||
|
as being generally available. It assumes that names are unique.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code the compiled regex
|
||||||
|
stringname the name whose number is required
|
||||||
|
|
||||||
|
Returns: the number of the named parentheses, or a negative number
|
||||||
|
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
int entrysize;
|
||||||
|
int top, bot;
|
||||||
|
uschar *nametable;
|
||||||
|
|
||||||
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||||
|
return rc;
|
||||||
|
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
|
||||||
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||||
|
return rc;
|
||||||
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
bot = 0;
|
||||||
|
while (top > bot)
|
||||||
|
{
|
||||||
|
int mid = (top + bot) / 2;
|
||||||
|
uschar *entry = nametable + entrysize*mid;
|
||||||
|
int c = strcmp(stringname, (char *)(entry + 2));
|
||||||
|
if (c == 0) return (entry[0] << 8) + entry[1];
|
||||||
|
if (c > 0) bot = mid + 1; else top = mid;
|
||||||
|
}
|
||||||
|
|
||||||
|
return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Find (multiple) entries for named string *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is used by the get_first_set() function below, as well as being
|
||||||
|
generally available. It is used when duplicated names are permitted.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code the compiled regex
|
||||||
|
stringname the name whose entries required
|
||||||
|
firstptr where to put the pointer to the first entry
|
||||||
|
lastptr where to put the pointer to the last entry
|
||||||
|
|
||||||
|
Returns: the length of each entry, or a negative number
|
||||||
|
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||||
|
char **firstptr, char **lastptr)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
int entrysize;
|
||||||
|
int top, bot;
|
||||||
|
uschar *nametable, *lastentry;
|
||||||
|
|
||||||
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||||
|
return rc;
|
||||||
|
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
|
||||||
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||||
|
return rc;
|
||||||
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
lastentry = nametable + entrysize * (top - 1);
|
||||||
|
bot = 0;
|
||||||
|
while (top > bot)
|
||||||
|
{
|
||||||
|
int mid = (top + bot) / 2;
|
||||||
|
uschar *entry = nametable + entrysize*mid;
|
||||||
|
int c = strcmp(stringname, (char *)(entry + 2));
|
||||||
|
if (c == 0)
|
||||||
|
{
|
||||||
|
uschar *first = entry;
|
||||||
|
uschar *last = entry;
|
||||||
|
while (first > nametable)
|
||||||
|
{
|
||||||
|
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
|
||||||
|
first -= entrysize;
|
||||||
|
}
|
||||||
|
while (last < lastentry)
|
||||||
|
{
|
||||||
|
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
|
||||||
|
last += entrysize;
|
||||||
|
}
|
||||||
|
*firstptr = (char *)first;
|
||||||
|
*lastptr = (char *)last;
|
||||||
|
return entrysize;
|
||||||
|
}
|
||||||
|
if (c > 0) bot = mid + 1; else top = mid;
|
||||||
|
}
|
||||||
|
|
||||||
|
return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Find first set of multiple named strings *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function allows for duplicate names in the table of named substrings.
|
||||||
|
It returns the number of the first one that was set in a pattern match.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code the compiled regex
|
||||||
|
stringname the name of the capturing substring
|
||||||
|
ovector the vector of matched substrings
|
||||||
|
|
||||||
|
Returns: the number of the first that is set,
|
||||||
|
or the number of the last one if none are set,
|
||||||
|
or a negative number on error
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
||||||
|
{
|
||||||
|
const real_pcre *re = (const real_pcre *)code;
|
||||||
|
int entrysize;
|
||||||
|
char *first, *last;
|
||||||
|
uschar *entry;
|
||||||
|
if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
|
||||||
|
return pcre_get_stringnumber(code, stringname);
|
||||||
|
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||||
|
if (entrysize <= 0) return entrysize;
|
||||||
|
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
|
||||||
|
{
|
||||||
|
int n = (entry[0] << 8) + entry[1];
|
||||||
|
if (ovector[n*2] >= 0) return n;
|
||||||
|
}
|
||||||
|
return (first[0] << 8) + first[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Copy captured string to given buffer *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function copies a single captured substring into a given buffer.
|
||||||
|
Note that we use memcpy() rather than strncpy() in case there are binary zeros
|
||||||
|
in the string.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
subject the subject string that was matched
|
||||||
|
ovector pointer to the offsets table
|
||||||
|
stringcount the number of substrings that were captured
|
||||||
|
(i.e. the yield of the pcre_exec call, unless
|
||||||
|
that was zero, in which case it should be 1/3
|
||||||
|
of the offset table size)
|
||||||
|
stringnumber the number of the required substring
|
||||||
|
buffer where to put the substring
|
||||||
|
size the size of the buffer
|
||||||
|
|
||||||
|
Returns: if successful:
|
||||||
|
the length of the copied string, not including the zero
|
||||||
|
that is put on the end; can be zero
|
||||||
|
if not successful:
|
||||||
|
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||||
|
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||||
|
int stringnumber, char *buffer, int size)
|
||||||
|
{
|
||||||
|
int yield;
|
||||||
|
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||||
|
return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
stringnumber *= 2;
|
||||||
|
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||||
|
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
||||||
|
memcpy(buffer, subject + ovector[stringnumber], yield);
|
||||||
|
buffer[yield] = 0;
|
||||||
|
return yield;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Copy named captured string to given buffer *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function copies a single captured substring into a given buffer,
|
||||||
|
identifying it by name. If the regex permits duplicate names, the first
|
||||||
|
substring that is set is chosen.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code the compiled regex
|
||||||
|
subject the subject string that was matched
|
||||||
|
ovector pointer to the offsets table
|
||||||
|
stringcount the number of substrings that were captured
|
||||||
|
(i.e. the yield of the pcre_exec call, unless
|
||||||
|
that was zero, in which case it should be 1/3
|
||||||
|
of the offset table size)
|
||||||
|
stringname the name of the required substring
|
||||||
|
buffer where to put the substring
|
||||||
|
size the size of the buffer
|
||||||
|
|
||||||
|
Returns: if successful:
|
||||||
|
the length of the copied string, not including the zero
|
||||||
|
that is put on the end; can be zero
|
||||||
|
if not successful:
|
||||||
|
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||||
|
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||||
|
int stringcount, const char *stringname, char *buffer, int size)
|
||||||
|
{
|
||||||
|
int n = get_first_set(code, stringname, ovector);
|
||||||
|
if (n <= 0) return n;
|
||||||
|
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Copy all captured strings to new store *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function gets one chunk of store and builds a list of pointers and all
|
||||||
|
of the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
subject the subject string that was matched
|
||||||
|
ovector pointer to the offsets table
|
||||||
|
stringcount the number of substrings that were captured
|
||||||
|
(i.e. the yield of the pcre_exec call, unless
|
||||||
|
that was zero, in which case it should be 1/3
|
||||||
|
of the offset table size)
|
||||||
|
listptr set to point to the list of pointers
|
||||||
|
|
||||||
|
Returns: if successful: 0
|
||||||
|
if not successful:
|
||||||
|
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||||
|
const char ***listptr)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int size = sizeof(char *);
|
||||||
|
int double_count = stringcount * 2;
|
||||||
|
char **stringlist;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
for (i = 0; i < double_count; i += 2)
|
||||||
|
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
|
||||||
|
|
||||||
|
stringlist = (char **)(pcre_malloc)(size);
|
||||||
|
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||||
|
|
||||||
|
*listptr = (const char **)stringlist;
|
||||||
|
p = (char *)(stringlist + stringcount + 1);
|
||||||
|
|
||||||
|
for (i = 0; i < double_count; i += 2)
|
||||||
|
{
|
||||||
|
int len = ovector[i+1] - ovector[i];
|
||||||
|
memcpy(p, subject + ovector[i], len);
|
||||||
|
*stringlist++ = p;
|
||||||
|
p += len;
|
||||||
|
*p++ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
*stringlist = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Free store obtained by get_substring_list *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function exists for the benefit of people calling PCRE from non-C
|
||||||
|
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||||
|
|
||||||
|
Argument: the result of a previous pcre_get_substring_list()
|
||||||
|
Returns: nothing
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
pcre_free_substring_list(const char **pointer)
|
||||||
|
{
|
||||||
|
(pcre_free)((void *)pointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Copy captured string to new store *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function copies a single captured substring into a piece of new
|
||||||
|
store
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
subject the subject string that was matched
|
||||||
|
ovector pointer to the offsets table
|
||||||
|
stringcount the number of substrings that were captured
|
||||||
|
(i.e. the yield of the pcre_exec call, unless
|
||||||
|
that was zero, in which case it should be 1/3
|
||||||
|
of the offset table size)
|
||||||
|
stringnumber the number of the required substring
|
||||||
|
stringptr where to put a pointer to the substring
|
||||||
|
|
||||||
|
Returns: if successful:
|
||||||
|
the length of the string, not including the zero that
|
||||||
|
is put on the end; can be zero
|
||||||
|
if not successful:
|
||||||
|
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||||
|
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||||
|
int stringnumber, const char **stringptr)
|
||||||
|
{
|
||||||
|
int yield;
|
||||||
|
char *substring;
|
||||||
|
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||||
|
return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
stringnumber *= 2;
|
||||||
|
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||||
|
substring = (char *)(pcre_malloc)(yield + 1);
|
||||||
|
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
||||||
|
memcpy(substring, subject + ovector[stringnumber], yield);
|
||||||
|
substring[yield] = 0;
|
||||||
|
*stringptr = substring;
|
||||||
|
return yield;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Copy named captured string to new store *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function copies a single captured substring, identified by name, into
|
||||||
|
new store. If the regex permits duplicate names, the first substring that is
|
||||||
|
set is chosen.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code the compiled regex
|
||||||
|
subject the subject string that was matched
|
||||||
|
ovector pointer to the offsets table
|
||||||
|
stringcount the number of substrings that were captured
|
||||||
|
(i.e. the yield of the pcre_exec call, unless
|
||||||
|
that was zero, in which case it should be 1/3
|
||||||
|
of the offset table size)
|
||||||
|
stringname the name of the required substring
|
||||||
|
stringptr where to put the pointer
|
||||||
|
|
||||||
|
Returns: if successful:
|
||||||
|
the length of the copied string, not including the zero
|
||||||
|
that is put on the end; can be zero
|
||||||
|
if not successful:
|
||||||
|
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
|
||||||
|
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||||
|
int stringcount, const char *stringname, const char **stringptr)
|
||||||
|
{
|
||||||
|
int n = get_first_set(code, stringname, ovector);
|
||||||
|
if (n <= 0) return n;
|
||||||
|
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Free store obtained by get_substring *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function exists for the benefit of people calling PCRE from non-C
|
||||||
|
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||||
|
|
||||||
|
Argument: the result of a previous pcre_get_substring()
|
||||||
|
Returns: nothing
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
pcre_free_substring(const char *pointer)
|
||||||
|
{
|
||||||
|
(pcre_free)((void *)pointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_get.c */
|
59
glib/pcre/pcre_globals.c
Normal file
59
glib/pcre/pcre_globals.c
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains global variables that are exported by the PCRE library.
|
||||||
|
PCRE is thread-clean and doesn't use any global variables in the normal sense.
|
||||||
|
However, it calls memory allocation and freeing functions via the four
|
||||||
|
indirections below, and it can optionally do callouts, using the fifth
|
||||||
|
indirection. These values can be changed by the caller, but are shared between
|
||||||
|
all threads. However, when compiling for Virtual Pascal, things are done
|
||||||
|
differently, and global variables are not used (see pcre.in). */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||||
|
#else
|
||||||
|
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* End of pcre_globals.c */
|
89
glib/pcre/pcre_info.c
Normal file
89
glib/pcre/pcre_info.c
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_info(), which gives some
|
||||||
|
information about a compiled pattern. However, use of this function is now
|
||||||
|
deprecated, as it has been superseded by pcre_fullinfo(). */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* (Obsolete) Return info about compiled pattern *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is the original "info" function. It picks potentially useful data out
|
||||||
|
of the private structure, but its interface was too rigid. It remains for
|
||||||
|
backwards compatibility. The public options are passed back in an int - though
|
||||||
|
the re->options field has been expanded to a long int, all the public options
|
||||||
|
at the low end of it, and so even on 16-bit systems this will still be OK.
|
||||||
|
Therefore, I haven't changed the API for pcre_info().
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
argument_re points to compiled code
|
||||||
|
optptr where to pass back the options
|
||||||
|
first_byte where to pass back the first character,
|
||||||
|
or -1 if multiline and all branches start ^,
|
||||||
|
or -2 otherwise
|
||||||
|
|
||||||
|
Returns: number of capturing subpatterns
|
||||||
|
or negative values on error
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_DATA_SCOPE int
|
||||||
|
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||||
|
{
|
||||||
|
real_pcre internal_re;
|
||||||
|
const real_pcre *re = (const real_pcre *)argument_re;
|
||||||
|
if (re == NULL) return PCRE_ERROR_NULL;
|
||||||
|
if (re->magic_number != MAGIC_NUMBER)
|
||||||
|
{
|
||||||
|
re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
|
||||||
|
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
||||||
|
}
|
||||||
|
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
|
||||||
|
if (first_byte != NULL)
|
||||||
|
*first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||||
|
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||||
|
return re->top_bracket;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_info.c */
|
1041
glib/pcre/pcre_internal.h
Normal file
1041
glib/pcre/pcre_internal.h
Normal file
File diff suppressed because it is too large
Load Diff
140
glib/pcre/pcre_maketables.c
Normal file
140
glib/pcre/pcre_maketables.c
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_maketables(), which builds
|
||||||
|
character tables for PCRE in the current locale. The file is compiled on its
|
||||||
|
own as part of the PCRE library. However, it is also included in the
|
||||||
|
compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef DFTABLES
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Create PCRE character tables *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function builds a set of character tables for use by PCRE and returns
|
||||||
|
a pointer to them. They are build using the ctype functions, and consequently
|
||||||
|
their contents will depend upon the current locale setting. When compiled as
|
||||||
|
part of the library, the store is obtained via pcre_malloc(), but when compiled
|
||||||
|
inside dftables, use malloc().
|
||||||
|
|
||||||
|
Arguments: none
|
||||||
|
Returns: pointer to the contiguous block of data
|
||||||
|
*/
|
||||||
|
|
||||||
|
const unsigned char *
|
||||||
|
pcre_maketables(void)
|
||||||
|
{
|
||||||
|
unsigned char *yield, *p;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
#ifndef DFTABLES
|
||||||
|
yield = (unsigned char*)(pcre_malloc)(tables_length);
|
||||||
|
#else
|
||||||
|
yield = (unsigned char*)malloc(tables_length);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (yield == NULL) return NULL;
|
||||||
|
p = yield;
|
||||||
|
|
||||||
|
/* First comes the lower casing table */
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||||
|
|
||||||
|
/* Next the case-flipping table */
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
||||||
|
|
||||||
|
/* Then the character class tables. Don't try to be clever and save effort on
|
||||||
|
exclusive ones - in some locales things may be different. Note that the table
|
||||||
|
for "space" includes everything "isspace" gives, including VT in the default
|
||||||
|
locale. This makes it work for the POSIX class [:space:]. Note also that it is
|
||||||
|
possible for a character to be alnum or alpha without being lower or upper,
|
||||||
|
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
|
||||||
|
least under Debian Linux's locales as of 12/2005). So we must test for alnum
|
||||||
|
specially. */
|
||||||
|
|
||||||
|
memset(p, 0, cbit_length);
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
|
||||||
|
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
|
||||||
|
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
|
||||||
|
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
|
||||||
|
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
|
||||||
|
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
|
||||||
|
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
|
||||||
|
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
|
||||||
|
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
|
||||||
|
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
|
||||||
|
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
|
||||||
|
}
|
||||||
|
p += cbit_length;
|
||||||
|
|
||||||
|
/* Finally, the character type table. In this, we exclude VT from the white
|
||||||
|
space chars, because Perl doesn't recognize it as such for \s and for comments
|
||||||
|
within regexes. */
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
int x = 0;
|
||||||
|
if (i != 0x0b && isspace(i)) x += ctype_space;
|
||||||
|
if (isalpha(i)) x += ctype_letter;
|
||||||
|
if (isdigit(i)) x += ctype_digit;
|
||||||
|
if (isxdigit(i)) x += ctype_xdigit;
|
||||||
|
if (isalnum(i) || i == '_') x += ctype_word;
|
||||||
|
|
||||||
|
/* Note: strchr includes the terminating zero in the characters it considers.
|
||||||
|
In this instance, that is ok because we want binary zero to be flagged as a
|
||||||
|
meta-character, which in this sense is any character that terminates a run
|
||||||
|
of data characters. */
|
||||||
|
|
||||||
|
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
|
||||||
|
*p++ = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
return yield;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_maketables.c */
|
135
glib/pcre/pcre_newline.c
Normal file
135
glib/pcre/pcre_newline.c
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains internal functions for testing newlines when more than
|
||||||
|
one kind of newline is to be recognized. When a newline is found, its length is
|
||||||
|
returned. In principle, we could implement several newline "types", each
|
||||||
|
referring to a different set of newline characters. At present, PCRE supports
|
||||||
|
only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL,
|
||||||
|
so for now the type isn't passed into the functions. It can easily be added
|
||||||
|
later if required. The full list of Unicode newline characters is taken from
|
||||||
|
http://unicode.org/unicode/reports/tr18/. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Check for newline at given position *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* It is guaranteed that the initial value of ptr is less than the end of the
|
||||||
|
string that is being processed.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
ptr pointer to possible newline
|
||||||
|
endptr pointer to the end of the string
|
||||||
|
lenptr where to return the length
|
||||||
|
utf8 TRUE if in utf8 mode
|
||||||
|
|
||||||
|
Returns: TRUE or FALSE
|
||||||
|
*/
|
||||||
|
|
||||||
|
BOOL
|
||||||
|
_pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr,
|
||||||
|
BOOL utf8)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
case 0x000a: /* LF */
|
||||||
|
case 0x000b: /* VT */
|
||||||
|
case 0x000c: *lenptr = 1; return TRUE; /* FF */
|
||||||
|
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||||
|
return TRUE; /* CR */
|
||||||
|
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
|
||||||
|
case 0x2028: /* LS */
|
||||||
|
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||||
|
default: return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Check for newline at previous position *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* It is guaranteed that the initial value of ptr is greater than the start of
|
||||||
|
the string that is being processed.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
ptr pointer to possible newline
|
||||||
|
startptr pointer to the start of the string
|
||||||
|
lenptr where to return the length
|
||||||
|
utf8 TRUE if in utf8 mode
|
||||||
|
|
||||||
|
Returns: TRUE or FALSE
|
||||||
|
*/
|
||||||
|
|
||||||
|
BOOL
|
||||||
|
_pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr,
|
||||||
|
BOOL utf8)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
ptr--;
|
||||||
|
if (utf8)
|
||||||
|
{
|
||||||
|
BACKCHAR(ptr);
|
||||||
|
GETCHAR(c, ptr);
|
||||||
|
}
|
||||||
|
else c = *ptr;
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||||
|
return TRUE; /* LF */
|
||||||
|
case 0x000b: /* VT */
|
||||||
|
case 0x000c: /* FF */
|
||||||
|
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||||
|
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
|
||||||
|
case 0x2028: /* LS */
|
||||||
|
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||||
|
default: return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_newline.c */
|
78
glib/pcre/pcre_ord2utf8.c
Normal file
78
glib/pcre/pcre_ord2utf8.c
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This file contains a private PCRE function that converts an ordinal
|
||||||
|
character value into a UTF8 string. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Convert character value to UTF-8 *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function takes an integer value in the range 0 - 0x7fffffff
|
||||||
|
and encodes it as a UTF-8 character in 0 to 6 bytes.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
cvalue the character value
|
||||||
|
buffer pointer to buffer for result - at least 6 bytes long
|
||||||
|
|
||||||
|
Returns: number of characters placed in the buffer
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
||||||
|
{
|
||||||
|
register int i, j;
|
||||||
|
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
||||||
|
if (cvalue <= _pcre_utf8_table1[i]) break;
|
||||||
|
buffer += i;
|
||||||
|
for (j = i; j > 0; j--)
|
||||||
|
{
|
||||||
|
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||||
|
cvalue >>= 6;
|
||||||
|
}
|
||||||
|
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||||
|
return i + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_ord2utf8.c */
|
77
glib/pcre/pcre_refcount.c
Normal file
77
glib/pcre/pcre_refcount.c
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_refcount(), which is an
|
||||||
|
auxiliary function that can be used to maintain a reference count in a compiled
|
||||||
|
pattern data block. This might be helpful in applications where the block is
|
||||||
|
shared by different users. */
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Maintain reference count *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* The reference count is a 16-bit field, initialized to zero. It is not
|
||||||
|
possible to transfer a non-zero count from one host to a different host that
|
||||||
|
has a different byte order - though I can't see why anyone in their right mind
|
||||||
|
would ever want to do that!
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
argument_re points to compiled code
|
||||||
|
adjust value to add to the count
|
||||||
|
|
||||||
|
Returns: the (possibly updated) count value (a non-negative number), or
|
||||||
|
a negative error number
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_DATA_SCOPE int
|
||||||
|
pcre_refcount(pcre *argument_re, int adjust)
|
||||||
|
{
|
||||||
|
real_pcre *re = (real_pcre *)argument_re;
|
||||||
|
if (re == NULL) return PCRE_ERROR_NULL;
|
||||||
|
re->ref_count = (-adjust > re->ref_count)? 0 :
|
||||||
|
(adjust + re->ref_count > 65535)? 65535 :
|
||||||
|
re->ref_count + adjust;
|
||||||
|
return re->ref_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_refcount.c */
|
570
glib/pcre/pcre_study.c
Normal file
570
glib/pcre/pcre_study.c
Normal file
@ -0,0 +1,570 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_study(), along with local
|
||||||
|
supporting functions. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* Returns from set_start_bits() */
|
||||||
|
|
||||||
|
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Set a bit and maybe its alternate case *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Given a character, set its bit in the table, and also the bit for the other
|
||||||
|
version of a letter if we are caseless.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
start_bits points to the bit map
|
||||||
|
c is the character
|
||||||
|
caseless the caseless flag
|
||||||
|
cd the block with char table pointers
|
||||||
|
|
||||||
|
Returns: nothing
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
|
||||||
|
{
|
||||||
|
start_bits[c/8] |= (1 << (c&7));
|
||||||
|
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
||||||
|
start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Create bitmap of starting bytes *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function scans a compiled unanchored expression recursively and
|
||||||
|
attempts to build a bitmap of the set of possible starting bytes. As time goes
|
||||||
|
by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
|
||||||
|
useful for parenthesized groups in patterns such as (a*)b where the group
|
||||||
|
provides some optional starting bytes but scanning must continue at the outer
|
||||||
|
level to find at least one mandatory byte. At the outermost level, this
|
||||||
|
function fails unless the result is SSB_DONE.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code points to an expression
|
||||||
|
start_bits points to a 32-byte table, initialized to 0
|
||||||
|
caseless the current state of the caseless flag
|
||||||
|
utf8 TRUE if in UTF-8 mode
|
||||||
|
cd the block with char table pointers
|
||||||
|
|
||||||
|
Returns: SSB_FAIL => Failed to find any starting bytes
|
||||||
|
SSB_DONE => Found mandatory starting bytes
|
||||||
|
SSB_CONTINUE => Found optional starting bytes
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
||||||
|
BOOL utf8, compile_data *cd)
|
||||||
|
{
|
||||||
|
register int c;
|
||||||
|
int yield = SSB_DONE;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* ========================================================================= */
|
||||||
|
/* The following comment and code was inserted in January 1999. In May 2006,
|
||||||
|
when it was observed to cause compiler warnings about unused values, I took it
|
||||||
|
out again. If anybody is still using OS/2, they will have to put it back
|
||||||
|
manually. */
|
||||||
|
|
||||||
|
/* This next statement and the later reference to dummy are here in order to
|
||||||
|
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
||||||
|
code. Apparently IBM isn't going to fix the problem, and we would rather not
|
||||||
|
disable optimization (in this module it actually makes a big difference, and
|
||||||
|
the pcre module can use all the optimization it can get). */
|
||||||
|
|
||||||
|
volatile int dummy;
|
||||||
|
/* ========================================================================= */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
|
||||||
|
BOOL try_next = TRUE;
|
||||||
|
|
||||||
|
while (try_next) /* Loop for items in this branch */
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
switch(*tcode)
|
||||||
|
{
|
||||||
|
/* Fail if we reach something we don't understand */
|
||||||
|
|
||||||
|
default:
|
||||||
|
return SSB_FAIL;
|
||||||
|
|
||||||
|
/* If we hit a bracket or a positive lookahead assertion, recurse to set
|
||||||
|
bits from within the subpattern. If it can't find anything, we have to
|
||||||
|
give up. If it finds some mandatory character(s), we are done for this
|
||||||
|
branch. Otherwise, carry on scanning after the subpattern. */
|
||||||
|
|
||||||
|
case OP_BRA:
|
||||||
|
case OP_SBRA:
|
||||||
|
case OP_CBRA:
|
||||||
|
case OP_SCBRA:
|
||||||
|
case OP_ONCE:
|
||||||
|
case OP_ASSERT:
|
||||||
|
rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
|
||||||
|
if (rc == SSB_FAIL) return SSB_FAIL;
|
||||||
|
if (rc == SSB_DONE) try_next = FALSE; else
|
||||||
|
{
|
||||||
|
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||||
|
tcode += 1 + LINK_SIZE;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* If we hit ALT or KET, it means we haven't found anything mandatory in
|
||||||
|
this branch, though we might have found something optional. For ALT, we
|
||||||
|
continue with the next alternative, but we have to arrange that the final
|
||||||
|
result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
|
||||||
|
return SSB_CONTINUE: if this is the top level, that indicates failure,
|
||||||
|
but after a nested subpattern, it causes scanning to continue. */
|
||||||
|
|
||||||
|
case OP_ALT:
|
||||||
|
yield = SSB_CONTINUE;
|
||||||
|
try_next = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_KET:
|
||||||
|
case OP_KETRMAX:
|
||||||
|
case OP_KETRMIN:
|
||||||
|
return SSB_CONTINUE;
|
||||||
|
|
||||||
|
/* Skip over callout */
|
||||||
|
|
||||||
|
case OP_CALLOUT:
|
||||||
|
tcode += 2 + 2*LINK_SIZE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Skip over lookbehind and negative lookahead assertions */
|
||||||
|
|
||||||
|
case OP_ASSERT_NOT:
|
||||||
|
case OP_ASSERTBACK:
|
||||||
|
case OP_ASSERTBACK_NOT:
|
||||||
|
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||||
|
tcode += 1 + LINK_SIZE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Skip over an option setting, changing the caseless flag */
|
||||||
|
|
||||||
|
case OP_OPT:
|
||||||
|
caseless = (tcode[1] & PCRE_CASELESS) != 0;
|
||||||
|
tcode += 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* BRAZERO does the bracket, but carries on. */
|
||||||
|
|
||||||
|
case OP_BRAZERO:
|
||||||
|
case OP_BRAMINZERO:
|
||||||
|
if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
|
||||||
|
return SSB_FAIL;
|
||||||
|
/* =========================================================================
|
||||||
|
See the comment at the head of this function concerning the next line,
|
||||||
|
which was an old fudge for the benefit of OS/2.
|
||||||
|
dummy = 1;
|
||||||
|
========================================================================= */
|
||||||
|
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||||
|
tcode += 1 + LINK_SIZE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Single-char * or ? sets the bit and tries the next item */
|
||||||
|
|
||||||
|
case OP_STAR:
|
||||||
|
case OP_MINSTAR:
|
||||||
|
case OP_POSSTAR:
|
||||||
|
case OP_QUERY:
|
||||||
|
case OP_MINQUERY:
|
||||||
|
case OP_POSQUERY:
|
||||||
|
set_bit(start_bits, tcode[1], caseless, cd);
|
||||||
|
tcode += 2;
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
|
if (utf8 && tcode[-1] >= 0xc0)
|
||||||
|
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Single-char upto sets the bit and tries the next */
|
||||||
|
|
||||||
|
case OP_UPTO:
|
||||||
|
case OP_MINUPTO:
|
||||||
|
case OP_POSUPTO:
|
||||||
|
set_bit(start_bits, tcode[3], caseless, cd);
|
||||||
|
tcode += 4;
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
|
if (utf8 && tcode[-1] >= 0xc0)
|
||||||
|
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* At least one single char sets the bit and stops */
|
||||||
|
|
||||||
|
case OP_EXACT: /* Fall through */
|
||||||
|
tcode += 2;
|
||||||
|
|
||||||
|
case OP_CHAR:
|
||||||
|
case OP_CHARNC:
|
||||||
|
case OP_PLUS:
|
||||||
|
case OP_MINPLUS:
|
||||||
|
case OP_POSPLUS:
|
||||||
|
set_bit(start_bits, tcode[1], caseless, cd);
|
||||||
|
try_next = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Single character type sets the bits and stops */
|
||||||
|
|
||||||
|
case OP_NOT_DIGIT:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
||||||
|
try_next = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_DIGIT:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||||
|
try_next = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||||
|
discard it. */
|
||||||
|
|
||||||
|
case OP_NOT_WHITESPACE:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
{
|
||||||
|
int d = cd->cbits[c+cbit_space];
|
||||||
|
if (c == 1) d &= ~0x08;
|
||||||
|
start_bits[c] |= ~d;
|
||||||
|
}
|
||||||
|
try_next = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||||
|
discard it. */
|
||||||
|
|
||||||
|
case OP_WHITESPACE:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
{
|
||||||
|
int d = cd->cbits[c+cbit_space];
|
||||||
|
if (c == 1) d &= ~0x08;
|
||||||
|
start_bits[c] |= d;
|
||||||
|
}
|
||||||
|
try_next = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOT_WORDCHAR:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
||||||
|
try_next = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_WORDCHAR:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
start_bits[c] |= cd->cbits[c+cbit_word];
|
||||||
|
try_next = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* One or more character type fudges the pointer and restarts, knowing
|
||||||
|
it will hit a single character type and stop there. */
|
||||||
|
|
||||||
|
case OP_TYPEPLUS:
|
||||||
|
case OP_TYPEMINPLUS:
|
||||||
|
tcode++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_TYPEEXACT:
|
||||||
|
tcode += 3;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Zero or more repeats of character types set the bits and then
|
||||||
|
try again. */
|
||||||
|
|
||||||
|
case OP_TYPEUPTO:
|
||||||
|
case OP_TYPEMINUPTO:
|
||||||
|
case OP_TYPEPOSUPTO:
|
||||||
|
tcode += 2; /* Fall through */
|
||||||
|
|
||||||
|
case OP_TYPESTAR:
|
||||||
|
case OP_TYPEMINSTAR:
|
||||||
|
case OP_TYPEPOSSTAR:
|
||||||
|
case OP_TYPEQUERY:
|
||||||
|
case OP_TYPEMINQUERY:
|
||||||
|
case OP_TYPEPOSQUERY:
|
||||||
|
switch(tcode[1])
|
||||||
|
{
|
||||||
|
case OP_ANY:
|
||||||
|
return SSB_FAIL;
|
||||||
|
|
||||||
|
case OP_NOT_DIGIT:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_DIGIT:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||||
|
discard it. */
|
||||||
|
|
||||||
|
case OP_NOT_WHITESPACE:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
{
|
||||||
|
int d = cd->cbits[c+cbit_space];
|
||||||
|
if (c == 1) d &= ~0x08;
|
||||||
|
start_bits[c] |= ~d;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||||
|
discard it. */
|
||||||
|
|
||||||
|
case OP_WHITESPACE:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
{
|
||||||
|
int d = cd->cbits[c+cbit_space];
|
||||||
|
if (c == 1) d &= ~0x08;
|
||||||
|
start_bits[c] |= d;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOT_WORDCHAR:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_WORDCHAR:
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
start_bits[c] |= cd->cbits[c+cbit_word];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
tcode += 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Character class where all the information is in a bit map: set the
|
||||||
|
bits and either carry on or not, according to the repeat count. If it was
|
||||||
|
a negative class, and we are operating with UTF-8 characters, any byte
|
||||||
|
with a value >= 0xc4 is a potentially valid starter because it starts a
|
||||||
|
character with a value > 255. */
|
||||||
|
|
||||||
|
case OP_NCLASS:
|
||||||
|
if (utf8)
|
||||||
|
{
|
||||||
|
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||||
|
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||||
|
}
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
case OP_CLASS:
|
||||||
|
{
|
||||||
|
tcode++;
|
||||||
|
|
||||||
|
/* In UTF-8 mode, the bits in a bit map correspond to character
|
||||||
|
values, not to byte values. However, the bit map we are constructing is
|
||||||
|
for byte values. So we have to do a conversion for characters whose
|
||||||
|
value is > 127. In fact, there are only two possible starting bytes for
|
||||||
|
characters in the range 128 - 255. */
|
||||||
|
|
||||||
|
if (utf8)
|
||||||
|
{
|
||||||
|
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
||||||
|
for (c = 128; c < 256; c++)
|
||||||
|
{
|
||||||
|
if ((tcode[c/8] && (1 << (c&7))) != 0)
|
||||||
|
{
|
||||||
|
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
||||||
|
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
||||||
|
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Advance past the bit map, and act on what follows */
|
||||||
|
|
||||||
|
tcode += 32;
|
||||||
|
switch (*tcode)
|
||||||
|
{
|
||||||
|
case OP_CRSTAR:
|
||||||
|
case OP_CRMINSTAR:
|
||||||
|
case OP_CRQUERY:
|
||||||
|
case OP_CRMINQUERY:
|
||||||
|
tcode++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CRRANGE:
|
||||||
|
case OP_CRMINRANGE:
|
||||||
|
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
|
||||||
|
else try_next = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
try_next = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break; /* End of bitmap class handling */
|
||||||
|
|
||||||
|
} /* End of switch */
|
||||||
|
} /* End of try_next loop */
|
||||||
|
|
||||||
|
code += GET(code, 1); /* Advance to next branch */
|
||||||
|
}
|
||||||
|
while (*code == OP_ALT);
|
||||||
|
return yield;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Study a compiled expression *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is handed a compiled expression that it must study to produce
|
||||||
|
information that will speed up the matching. It returns a pcre_extra block
|
||||||
|
which then gets handed back to pcre_exec().
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
re points to the compiled expression
|
||||||
|
options contains option bits
|
||||||
|
errorptr points to where to place error messages;
|
||||||
|
set NULL unless error
|
||||||
|
|
||||||
|
Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||||
|
appropriate flag set;
|
||||||
|
NULL on error or if no optimization possible
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_DATA_SCOPE pcre_extra *
|
||||||
|
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||||
|
{
|
||||||
|
uschar start_bits[32];
|
||||||
|
pcre_extra *extra;
|
||||||
|
pcre_study_data *study;
|
||||||
|
const uschar *tables;
|
||||||
|
uschar *code;
|
||||||
|
compile_data compile_block;
|
||||||
|
const real_pcre *re = (const real_pcre *)external_re;
|
||||||
|
|
||||||
|
*errorptr = NULL;
|
||||||
|
|
||||||
|
if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
||||||
|
{
|
||||||
|
*errorptr = "argument is not a compiled regular expression";
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
||||||
|
{
|
||||||
|
*errorptr = "unknown or incorrect option bit(s) set";
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
code = (uschar *)re + re->name_table_offset +
|
||||||
|
(re->name_count * re->name_entry_size);
|
||||||
|
|
||||||
|
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
||||||
|
a multiline pattern that matches only at "line starts", no further processing
|
||||||
|
at present. */
|
||||||
|
|
||||||
|
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* Set the character tables in the block that is passed around */
|
||||||
|
|
||||||
|
tables = re->tables;
|
||||||
|
if (tables == NULL)
|
||||||
|
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
|
||||||
|
(void *)(&tables));
|
||||||
|
|
||||||
|
compile_block.lcc = tables + lcc_offset;
|
||||||
|
compile_block.fcc = tables + fcc_offset;
|
||||||
|
compile_block.cbits = tables + cbits_offset;
|
||||||
|
compile_block.ctypes = tables + ctypes_offset;
|
||||||
|
|
||||||
|
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||||
|
|
||||||
|
memset(start_bits, 0, 32 * sizeof(uschar));
|
||||||
|
if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||||
|
(re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
|
||||||
|
|
||||||
|
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
||||||
|
the latter, which is pointed to by the former, which may also get additional
|
||||||
|
data set later by the calling program. At the moment, the size of
|
||||||
|
pcre_study_data is fixed. We nevertheless save it in a field for returning via
|
||||||
|
the pcre_fullinfo() function so that if it becomes variable in the future, we
|
||||||
|
don't have to change that code. */
|
||||||
|
|
||||||
|
extra = (pcre_extra *)(pcre_malloc)
|
||||||
|
(sizeof(pcre_extra) + sizeof(pcre_study_data));
|
||||||
|
|
||||||
|
if (extra == NULL)
|
||||||
|
{
|
||||||
|
*errorptr = "failed to get memory";
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
|
||||||
|
extra->flags = PCRE_EXTRA_STUDY_DATA;
|
||||||
|
extra->study_data = study;
|
||||||
|
|
||||||
|
study->size = sizeof(pcre_study_data);
|
||||||
|
study->options = PCRE_STUDY_MAPPED;
|
||||||
|
memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
||||||
|
|
||||||
|
return extra;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_study.c */
|
304
glib/pcre/pcre_tables.c
Normal file
304
glib/pcre/pcre_tables.c
Normal file
@ -0,0 +1,304 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains some fixed tables that are used by more than one of the
|
||||||
|
PCRE code modules. The tables are also #included by the pcretest program, which
|
||||||
|
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||||
|
clashes with the library. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||||
|
the definition is next to the definition of the opcodes in pcre_internal.h. */
|
||||||
|
|
||||||
|
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Tables for UTF-8 support *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||||
|
character. */
|
||||||
|
|
||||||
|
const int _pcre_utf8_table1[] =
|
||||||
|
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||||
|
|
||||||
|
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
|
||||||
|
|
||||||
|
/* These are the indicator bits and the mask for the data bits to set in the
|
||||||
|
first byte of a character, indexed by the number of additional bytes. */
|
||||||
|
|
||||||
|
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||||
|
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||||
|
|
||||||
|
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||||
|
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||||
|
|
||||||
|
const uschar _pcre_utf8_table4[] = {
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||||
|
|
||||||
|
/* This table translates Unicode property names into type and code values. It
|
||||||
|
is searched by binary chop, so must be in collating sequence of name. */
|
||||||
|
|
||||||
|
const char _pcre_ucp_names[] =
|
||||||
|
"Any\0"
|
||||||
|
"Arabic\0"
|
||||||
|
"Armenian\0"
|
||||||
|
"Balinese\0"
|
||||||
|
"Bengali\0"
|
||||||
|
"Bopomofo\0"
|
||||||
|
"Braille\0"
|
||||||
|
"Buginese\0"
|
||||||
|
"Buhid\0"
|
||||||
|
"C\0"
|
||||||
|
"Canadian_Aboriginal\0"
|
||||||
|
"Cc\0"
|
||||||
|
"Cf\0"
|
||||||
|
"Cherokee\0"
|
||||||
|
"Cn\0"
|
||||||
|
"Co\0"
|
||||||
|
"Common\0"
|
||||||
|
"Coptic\0"
|
||||||
|
"Cs\0"
|
||||||
|
"Cuneiform\0"
|
||||||
|
"Cypriot\0"
|
||||||
|
"Cyrillic\0"
|
||||||
|
"Deseret\0"
|
||||||
|
"Devanagari\0"
|
||||||
|
"Ethiopic\0"
|
||||||
|
"Georgian\0"
|
||||||
|
"Glagolitic\0"
|
||||||
|
"Gothic\0"
|
||||||
|
"Greek\0"
|
||||||
|
"Gujarati\0"
|
||||||
|
"Gurmukhi\0"
|
||||||
|
"Han\0"
|
||||||
|
"Hangul\0"
|
||||||
|
"Hanunoo\0"
|
||||||
|
"Hebrew\0"
|
||||||
|
"Hiragana\0"
|
||||||
|
"Inherited\0"
|
||||||
|
"Kannada\0"
|
||||||
|
"Katakana\0"
|
||||||
|
"Kharoshthi\0"
|
||||||
|
"Khmer\0"
|
||||||
|
"L\0"
|
||||||
|
"L&\0"
|
||||||
|
"Lao\0"
|
||||||
|
"Latin\0"
|
||||||
|
"Limbu\0"
|
||||||
|
"Linear_B\0"
|
||||||
|
"Ll\0"
|
||||||
|
"Lm\0"
|
||||||
|
"Lo\0"
|
||||||
|
"Lt\0"
|
||||||
|
"Lu\0"
|
||||||
|
"M\0"
|
||||||
|
"Malayalam\0"
|
||||||
|
"Mc\0"
|
||||||
|
"Me\0"
|
||||||
|
"Mn\0"
|
||||||
|
"Mongolian\0"
|
||||||
|
"Myanmar\0"
|
||||||
|
"N\0"
|
||||||
|
"Nd\0"
|
||||||
|
"New_Tai_Lue\0"
|
||||||
|
"Nko\0"
|
||||||
|
"Nl\0"
|
||||||
|
"No\0"
|
||||||
|
"Ogham\0"
|
||||||
|
"Old_Italic\0"
|
||||||
|
"Old_Persian\0"
|
||||||
|
"Oriya\0"
|
||||||
|
"Osmanya\0"
|
||||||
|
"P\0"
|
||||||
|
"Pc\0"
|
||||||
|
"Pd\0"
|
||||||
|
"Pe\0"
|
||||||
|
"Pf\0"
|
||||||
|
"Phags_Pa\0"
|
||||||
|
"Phoenician\0"
|
||||||
|
"Pi\0"
|
||||||
|
"Po\0"
|
||||||
|
"Ps\0"
|
||||||
|
"Runic\0"
|
||||||
|
"S\0"
|
||||||
|
"Sc\0"
|
||||||
|
"Shavian\0"
|
||||||
|
"Sinhala\0"
|
||||||
|
"Sk\0"
|
||||||
|
"Sm\0"
|
||||||
|
"So\0"
|
||||||
|
"Syloti_Nagri\0"
|
||||||
|
"Syriac\0"
|
||||||
|
"Tagalog\0"
|
||||||
|
"Tagbanwa\0"
|
||||||
|
"Tai_Le\0"
|
||||||
|
"Tamil\0"
|
||||||
|
"Telugu\0"
|
||||||
|
"Thaana\0"
|
||||||
|
"Thai\0"
|
||||||
|
"Tibetan\0"
|
||||||
|
"Tifinagh\0"
|
||||||
|
"Ugaritic\0"
|
||||||
|
"Yi\0"
|
||||||
|
"Z\0"
|
||||||
|
"Zl\0"
|
||||||
|
"Zp\0"
|
||||||
|
"Zs\0";
|
||||||
|
|
||||||
|
const ucp_type_table _pcre_utt[] = {
|
||||||
|
{ 0, PT_ANY, 0 },
|
||||||
|
{ 4, PT_SC, ucp_Arabic },
|
||||||
|
{ 11, PT_SC, ucp_Armenian },
|
||||||
|
{ 20, PT_SC, ucp_Balinese },
|
||||||
|
{ 29, PT_SC, ucp_Bengali },
|
||||||
|
{ 37, PT_SC, ucp_Bopomofo },
|
||||||
|
{ 46, PT_SC, ucp_Braille },
|
||||||
|
{ 54, PT_SC, ucp_Buginese },
|
||||||
|
{ 63, PT_SC, ucp_Buhid },
|
||||||
|
{ 69, PT_GC, ucp_C },
|
||||||
|
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||||
|
{ 91, PT_PC, ucp_Cc },
|
||||||
|
{ 94, PT_PC, ucp_Cf },
|
||||||
|
{ 97, PT_SC, ucp_Cherokee },
|
||||||
|
{ 106, PT_PC, ucp_Cn },
|
||||||
|
{ 109, PT_PC, ucp_Co },
|
||||||
|
{ 112, PT_SC, ucp_Common },
|
||||||
|
{ 119, PT_SC, ucp_Coptic },
|
||||||
|
{ 126, PT_PC, ucp_Cs },
|
||||||
|
{ 129, PT_SC, ucp_Cuneiform },
|
||||||
|
{ 139, PT_SC, ucp_Cypriot },
|
||||||
|
{ 147, PT_SC, ucp_Cyrillic },
|
||||||
|
{ 156, PT_SC, ucp_Deseret },
|
||||||
|
{ 164, PT_SC, ucp_Devanagari },
|
||||||
|
{ 175, PT_SC, ucp_Ethiopic },
|
||||||
|
{ 184, PT_SC, ucp_Georgian },
|
||||||
|
{ 193, PT_SC, ucp_Glagolitic },
|
||||||
|
{ 204, PT_SC, ucp_Gothic },
|
||||||
|
{ 211, PT_SC, ucp_Greek },
|
||||||
|
{ 217, PT_SC, ucp_Gujarati },
|
||||||
|
{ 226, PT_SC, ucp_Gurmukhi },
|
||||||
|
{ 235, PT_SC, ucp_Han },
|
||||||
|
{ 239, PT_SC, ucp_Hangul },
|
||||||
|
{ 246, PT_SC, ucp_Hanunoo },
|
||||||
|
{ 254, PT_SC, ucp_Hebrew },
|
||||||
|
{ 261, PT_SC, ucp_Hiragana },
|
||||||
|
{ 270, PT_SC, ucp_Inherited },
|
||||||
|
{ 280, PT_SC, ucp_Kannada },
|
||||||
|
{ 288, PT_SC, ucp_Katakana },
|
||||||
|
{ 297, PT_SC, ucp_Kharoshthi },
|
||||||
|
{ 308, PT_SC, ucp_Khmer },
|
||||||
|
{ 314, PT_GC, ucp_L },
|
||||||
|
{ 316, PT_LAMP, 0 },
|
||||||
|
{ 319, PT_SC, ucp_Lao },
|
||||||
|
{ 323, PT_SC, ucp_Latin },
|
||||||
|
{ 329, PT_SC, ucp_Limbu },
|
||||||
|
{ 335, PT_SC, ucp_Linear_B },
|
||||||
|
{ 344, PT_PC, ucp_Ll },
|
||||||
|
{ 347, PT_PC, ucp_Lm },
|
||||||
|
{ 350, PT_PC, ucp_Lo },
|
||||||
|
{ 353, PT_PC, ucp_Lt },
|
||||||
|
{ 356, PT_PC, ucp_Lu },
|
||||||
|
{ 359, PT_GC, ucp_M },
|
||||||
|
{ 361, PT_SC, ucp_Malayalam },
|
||||||
|
{ 371, PT_PC, ucp_Mc },
|
||||||
|
{ 374, PT_PC, ucp_Me },
|
||||||
|
{ 377, PT_PC, ucp_Mn },
|
||||||
|
{ 380, PT_SC, ucp_Mongolian },
|
||||||
|
{ 390, PT_SC, ucp_Myanmar },
|
||||||
|
{ 398, PT_GC, ucp_N },
|
||||||
|
{ 400, PT_PC, ucp_Nd },
|
||||||
|
{ 403, PT_SC, ucp_New_Tai_Lue },
|
||||||
|
{ 415, PT_SC, ucp_Nko },
|
||||||
|
{ 419, PT_PC, ucp_Nl },
|
||||||
|
{ 422, PT_PC, ucp_No },
|
||||||
|
{ 425, PT_SC, ucp_Ogham },
|
||||||
|
{ 431, PT_SC, ucp_Old_Italic },
|
||||||
|
{ 442, PT_SC, ucp_Old_Persian },
|
||||||
|
{ 454, PT_SC, ucp_Oriya },
|
||||||
|
{ 460, PT_SC, ucp_Osmanya },
|
||||||
|
{ 468, PT_GC, ucp_P },
|
||||||
|
{ 470, PT_PC, ucp_Pc },
|
||||||
|
{ 473, PT_PC, ucp_Pd },
|
||||||
|
{ 476, PT_PC, ucp_Pe },
|
||||||
|
{ 479, PT_PC, ucp_Pf },
|
||||||
|
{ 482, PT_SC, ucp_Phags_Pa },
|
||||||
|
{ 491, PT_SC, ucp_Phoenician },
|
||||||
|
{ 502, PT_PC, ucp_Pi },
|
||||||
|
{ 505, PT_PC, ucp_Po },
|
||||||
|
{ 508, PT_PC, ucp_Ps },
|
||||||
|
{ 511, PT_SC, ucp_Runic },
|
||||||
|
{ 517, PT_GC, ucp_S },
|
||||||
|
{ 519, PT_PC, ucp_Sc },
|
||||||
|
{ 522, PT_SC, ucp_Shavian },
|
||||||
|
{ 530, PT_SC, ucp_Sinhala },
|
||||||
|
{ 538, PT_PC, ucp_Sk },
|
||||||
|
{ 541, PT_PC, ucp_Sm },
|
||||||
|
{ 544, PT_PC, ucp_So },
|
||||||
|
{ 547, PT_SC, ucp_Syloti_Nagri },
|
||||||
|
{ 560, PT_SC, ucp_Syriac },
|
||||||
|
{ 567, PT_SC, ucp_Tagalog },
|
||||||
|
{ 575, PT_SC, ucp_Tagbanwa },
|
||||||
|
{ 584, PT_SC, ucp_Tai_Le },
|
||||||
|
{ 591, PT_SC, ucp_Tamil },
|
||||||
|
{ 597, PT_SC, ucp_Telugu },
|
||||||
|
{ 604, PT_SC, ucp_Thaana },
|
||||||
|
{ 611, PT_SC, ucp_Thai },
|
||||||
|
{ 616, PT_SC, ucp_Tibetan },
|
||||||
|
{ 624, PT_SC, ucp_Tifinagh },
|
||||||
|
{ 633, PT_SC, ucp_Ugaritic },
|
||||||
|
{ 642, PT_SC, ucp_Yi },
|
||||||
|
{ 645, PT_GC, ucp_Z },
|
||||||
|
{ 647, PT_PC, ucp_Zl },
|
||||||
|
{ 650, PT_PC, ucp_Zp },
|
||||||
|
{ 653, PT_PC, ucp_Zs }
|
||||||
|
};
|
||||||
|
|
||||||
|
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||||
|
|
||||||
|
/* End of pcre_tables.c */
|
132
glib/pcre/pcre_try_flipped.c
Normal file
132
glib/pcre/pcre_try_flipped.c
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains an internal function that tests a compiled pattern to
|
||||||
|
see if it was compiled with the opposite endianness. If so, it uses an
|
||||||
|
auxiliary local function to flip the appropriate bytes. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Flip bytes in an integer *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called when the magic number in a regex doesn't match, in
|
||||||
|
order to flip its bytes to see if we are dealing with a pattern that was
|
||||||
|
compiled on a host of different endianness. If so, this function is used to
|
||||||
|
flip other byte values.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
value the number to flip
|
||||||
|
n the number of bytes to flip (assumed to be 2 or 4)
|
||||||
|
|
||||||
|
Returns: the flipped value
|
||||||
|
*/
|
||||||
|
|
||||||
|
static unsigned long int
|
||||||
|
byteflip(unsigned long int value, int n)
|
||||||
|
{
|
||||||
|
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
|
||||||
|
return ((value & 0x000000ff) << 24) |
|
||||||
|
((value & 0x0000ff00) << 8) |
|
||||||
|
((value & 0x00ff0000) >> 8) |
|
||||||
|
((value & 0xff000000) >> 24);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Test for a byte-flipped compiled regex *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
|
||||||
|
pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
|
||||||
|
is, it was compiled on a system of opposite endianness. The function is called
|
||||||
|
only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
|
||||||
|
we flip all the relevant values into a different data block, and return it.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
re points to the regex
|
||||||
|
study points to study data, or NULL
|
||||||
|
internal_re points to a new regex block
|
||||||
|
internal_study points to a new study block
|
||||||
|
|
||||||
|
Returns: the new block if is is indeed a byte-flipped regex
|
||||||
|
NULL if it is not
|
||||||
|
*/
|
||||||
|
|
||||||
|
real_pcre *
|
||||||
|
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
|
||||||
|
const pcre_study_data *study, pcre_study_data *internal_study)
|
||||||
|
{
|
||||||
|
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
*internal_re = *re; /* To copy other fields */
|
||||||
|
internal_re->size = byteflip(re->size, sizeof(re->size));
|
||||||
|
internal_re->options = byteflip(re->options, sizeof(re->options));
|
||||||
|
internal_re->top_bracket =
|
||||||
|
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
|
||||||
|
internal_re->top_backref =
|
||||||
|
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
|
||||||
|
internal_re->first_byte =
|
||||||
|
(pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
|
||||||
|
internal_re->req_byte =
|
||||||
|
(pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
|
||||||
|
internal_re->name_table_offset =
|
||||||
|
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
|
||||||
|
internal_re->name_entry_size =
|
||||||
|
(pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
|
||||||
|
internal_re->name_count =
|
||||||
|
(pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
|
||||||
|
|
||||||
|
if (study != NULL)
|
||||||
|
{
|
||||||
|
*internal_study = *study; /* To copy other fields */
|
||||||
|
internal_study->size = byteflip(study->size, sizeof(study->size));
|
||||||
|
internal_study->options = byteflip(study->options, sizeof(study->options));
|
||||||
|
}
|
||||||
|
|
||||||
|
return internal_re;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_tryflipped.c */
|
126
glib/pcre/pcre_ucp_searchfuncs.c
Normal file
126
glib/pcre/pcre_ucp_searchfuncs.c
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This file has been modified to use glib instead of the internal table
|
||||||
|
* in ucptable.c -- Marco Barisione */
|
||||||
|
|
||||||
|
/* This module contains code for searching the table of Unicode character
|
||||||
|
properties. */
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
#include "ucp.h" /* Category definitions */
|
||||||
|
#include "ucpinternal.h" /* Internal table details */
|
||||||
|
|
||||||
|
|
||||||
|
/* Table to translate from particular type value to the general value. */
|
||||||
|
|
||||||
|
static int ucp_gentype[] = {
|
||||||
|
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||||
|
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||||
|
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||||
|
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||||
|
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||||
|
ucp_P, ucp_P, /* Ps, Po */
|
||||||
|
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||||
|
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Search table and return type *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
||||||
|
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
c the character value
|
||||||
|
type_ptr the detailed character type is returned here
|
||||||
|
script_ptr the script is returned here
|
||||||
|
|
||||||
|
Returns: the character type category
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
|
||||||
|
{
|
||||||
|
/* Note that the Unicode types have the same values in glib and in
|
||||||
|
* PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
|
||||||
|
* ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
|
||||||
|
*type_ptr = g_unichar_type(c);
|
||||||
|
*script_ptr = g_unichar_get_script(c);
|
||||||
|
return ucp_gentype[*type_ptr];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Search table and return other case *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* If the given character is a letter, and there is another case for the
|
||||||
|
letter, return the other case. Otherwise, return -1.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
c the character value
|
||||||
|
|
||||||
|
Returns: the other case or NOTACHAR if none
|
||||||
|
*/
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
_pcre_ucp_othercase(const unsigned int c)
|
||||||
|
{
|
||||||
|
int other_case = NOTACHAR;
|
||||||
|
|
||||||
|
if (g_unichar_islower(c))
|
||||||
|
other_case = g_unichar_toupper(c);
|
||||||
|
else if (g_unichar_isupper(c))
|
||||||
|
other_case = g_unichar_tolower(c);
|
||||||
|
|
||||||
|
if (other_case == c)
|
||||||
|
other_case = NOTACHAR;
|
||||||
|
|
||||||
|
return other_case;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* End of pcre_ucp_searchfuncs.c */
|
13
glib/pcre/pcre_valid_utf8.c
Normal file
13
glib/pcre/pcre_valid_utf8.c
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function is not needed by GRegex, so print an error and
|
||||||
|
* return always -1, that is the string is a valid UTF-8 encoded
|
||||||
|
* string.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
_pcre_valid_utf8(const uschar *string, int length)
|
||||||
|
{
|
||||||
|
g_warning ("%s: this function should not be called", G_STRLOC);
|
||||||
|
return -1;
|
||||||
|
}
|
86
glib/pcre/pcre_version.c
Normal file
86
glib/pcre/pcre_version.c
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_version(), which returns a
|
||||||
|
string that identifies the PCRE version that is in use. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Return version string *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* These macros are the standard way of turning unquoted text into C strings.
|
||||||
|
They allow macros like PCRE_MAJOR to be defined without quotes, which is
|
||||||
|
convenient for user programs that want to test its value. */
|
||||||
|
|
||||||
|
#define STRING(a) # a
|
||||||
|
#define XSTRING(s) STRING(s)
|
||||||
|
|
||||||
|
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
|
||||||
|
production releases. Originally, it was used naively in this code:
|
||||||
|
|
||||||
|
return XSTRING(PCRE_MAJOR)
|
||||||
|
"." XSTRING(PCRE_MINOR)
|
||||||
|
XSTRING(PCRE_PRERELEASE)
|
||||||
|
" " XSTRING(PCRE_DATE);
|
||||||
|
|
||||||
|
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
|
||||||
|
STRING(). The C standard states: "If (before argument substitution) any
|
||||||
|
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||||
|
turns out the gcc treats this case as a single empty string - which is what we
|
||||||
|
really want - but Visual C grumbles about the lack of an argument for the
|
||||||
|
macro. Unfortunately, both are within their rights. To cope with both ways of
|
||||||
|
handling this, I had resort to some messy hackery that does a test at run time.
|
||||||
|
I could find no way of detecting that a macro is defined as an empty string at
|
||||||
|
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||||
|
the STRING macro with an empty argument when doing the test. */
|
||||||
|
|
||||||
|
PCRE_DATA_SCOPE const char *
|
||||||
|
pcre_version(void)
|
||||||
|
{
|
||||||
|
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||||
|
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
|
||||||
|
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_version.c */
|
144
glib/pcre/pcre_xclass.c
Normal file
144
glib/pcre/pcre_xclass.c
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains an internal function that is used to match an extended
|
||||||
|
class (one that contains characters whose values are > 255). It is used by both
|
||||||
|
pcre_exec() and pcre_def_exec(). */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Match character against an XCLASS *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called to match a character against an extended class that
|
||||||
|
might contain values > 255.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
c the character
|
||||||
|
data points to the flag byte of the XCLASS data
|
||||||
|
|
||||||
|
Returns: TRUE if character matches, else FALSE
|
||||||
|
*/
|
||||||
|
|
||||||
|
BOOL
|
||||||
|
_pcre_xclass(int c, const uschar *data)
|
||||||
|
{
|
||||||
|
int t;
|
||||||
|
BOOL negated = (*data & XCL_NOT) != 0;
|
||||||
|
|
||||||
|
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||||
|
not, we still carry on, because there may be ranges that start below 256 in the
|
||||||
|
additional data. */
|
||||||
|
|
||||||
|
if (c < 256)
|
||||||
|
{
|
||||||
|
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
|
||||||
|
return !negated; /* char found */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* First skip the bit map if present. Then match against the list of Unicode
|
||||||
|
properties or large chars or ranges that end with a large char. We won't ever
|
||||||
|
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||||
|
|
||||||
|
if ((*data++ & XCL_MAP) != 0) data += 32;
|
||||||
|
|
||||||
|
while ((t = *data++) != XCL_END)
|
||||||
|
{
|
||||||
|
int x, y;
|
||||||
|
if (t == XCL_SINGLE)
|
||||||
|
{
|
||||||
|
GETCHARINC(x, data);
|
||||||
|
if (c == x) return !negated;
|
||||||
|
}
|
||||||
|
else if (t == XCL_RANGE)
|
||||||
|
{
|
||||||
|
GETCHARINC(x, data);
|
||||||
|
GETCHARINC(y, data);
|
||||||
|
if (c >= x && c <= y) return !negated;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UCP
|
||||||
|
else /* XCL_PROP & XCL_NOTPROP */
|
||||||
|
{
|
||||||
|
int chartype, script;
|
||||||
|
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||||
|
|
||||||
|
switch(*data)
|
||||||
|
{
|
||||||
|
case PT_ANY:
|
||||||
|
if (t == XCL_PROP) return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_LAMP:
|
||||||
|
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
|
||||||
|
(t == XCL_PROP)) return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_GC:
|
||||||
|
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PC:
|
||||||
|
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SC:
|
||||||
|
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* This should never occur, but compilers may mutter if there is no
|
||||||
|
default. */
|
||||||
|
|
||||||
|
default:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
data += 2;
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UCP */
|
||||||
|
}
|
||||||
|
|
||||||
|
return negated; /* char did not match */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_xclass.c */
|
133
glib/pcre/ucp.h
Normal file
133
glib/pcre/ucp.h
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Unicode Property Table handler *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
#ifndef _UCP_H
|
||||||
|
#define _UCP_H
|
||||||
|
|
||||||
|
/* This file contains definitions of the property values that are returned by
|
||||||
|
the function _pcre_ucp_findprop(). New values that are added for new releases
|
||||||
|
of Unicode should always be at the end of each enum, for backwards
|
||||||
|
compatibility. */
|
||||||
|
|
||||||
|
/* These are the general character categories. */
|
||||||
|
|
||||||
|
enum {
|
||||||
|
ucp_C, /* Other */
|
||||||
|
ucp_L, /* Letter */
|
||||||
|
ucp_M, /* Mark */
|
||||||
|
ucp_N, /* Number */
|
||||||
|
ucp_P, /* Punctuation */
|
||||||
|
ucp_S, /* Symbol */
|
||||||
|
ucp_Z /* Separator */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* These are the particular character types. */
|
||||||
|
|
||||||
|
enum {
|
||||||
|
ucp_Cc, /* Control */
|
||||||
|
ucp_Cf, /* Format */
|
||||||
|
ucp_Cn, /* Unassigned */
|
||||||
|
ucp_Co, /* Private use */
|
||||||
|
ucp_Cs, /* Surrogate */
|
||||||
|
ucp_Ll, /* Lower case letter */
|
||||||
|
ucp_Lm, /* Modifier letter */
|
||||||
|
ucp_Lo, /* Other letter */
|
||||||
|
ucp_Lt, /* Title case letter */
|
||||||
|
ucp_Lu, /* Upper case letter */
|
||||||
|
ucp_Mc, /* Spacing mark */
|
||||||
|
ucp_Me, /* Enclosing mark */
|
||||||
|
ucp_Mn, /* Non-spacing mark */
|
||||||
|
ucp_Nd, /* Decimal number */
|
||||||
|
ucp_Nl, /* Letter number */
|
||||||
|
ucp_No, /* Other number */
|
||||||
|
ucp_Pc, /* Connector punctuation */
|
||||||
|
ucp_Pd, /* Dash punctuation */
|
||||||
|
ucp_Pe, /* Close punctuation */
|
||||||
|
ucp_Pf, /* Final punctuation */
|
||||||
|
ucp_Pi, /* Initial punctuation */
|
||||||
|
ucp_Po, /* Other punctuation */
|
||||||
|
ucp_Ps, /* Open punctuation */
|
||||||
|
ucp_Sc, /* Currency symbol */
|
||||||
|
ucp_Sk, /* Modifier symbol */
|
||||||
|
ucp_Sm, /* Mathematical symbol */
|
||||||
|
ucp_So, /* Other symbol */
|
||||||
|
ucp_Zl, /* Line separator */
|
||||||
|
ucp_Zp, /* Paragraph separator */
|
||||||
|
ucp_Zs /* Space separator */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* These are the script identifications. */
|
||||||
|
|
||||||
|
enum {
|
||||||
|
ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
|
||||||
|
ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
|
||||||
|
ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
|
||||||
|
ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
|
||||||
|
ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
|
||||||
|
ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
|
||||||
|
ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
|
||||||
|
ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
|
||||||
|
ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
|
||||||
|
ucp_Common = G_UNICODE_SCRIPT_COMMON,
|
||||||
|
ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
|
||||||
|
ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
|
||||||
|
ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
|
||||||
|
ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
|
||||||
|
ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
|
||||||
|
ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
|
||||||
|
ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
|
||||||
|
ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
|
||||||
|
ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
|
||||||
|
ucp_Greek = G_UNICODE_SCRIPT_GREEK,
|
||||||
|
ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
|
||||||
|
ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
|
||||||
|
ucp_Han = G_UNICODE_SCRIPT_HAN,
|
||||||
|
ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
|
||||||
|
ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
|
||||||
|
ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
|
||||||
|
ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
|
||||||
|
ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
|
||||||
|
ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
|
||||||
|
ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
|
||||||
|
ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
|
||||||
|
ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
|
||||||
|
ucp_Lao = G_UNICODE_SCRIPT_LAO,
|
||||||
|
ucp_Latin = G_UNICODE_SCRIPT_LATIN,
|
||||||
|
ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
|
||||||
|
ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
|
||||||
|
ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
|
||||||
|
ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
|
||||||
|
ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
|
||||||
|
ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
|
||||||
|
ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
|
||||||
|
ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
|
||||||
|
ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
|
||||||
|
ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
|
||||||
|
ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
|
||||||
|
ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
|
||||||
|
ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
|
||||||
|
ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
|
||||||
|
ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
|
||||||
|
ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
|
||||||
|
ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
|
||||||
|
ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
|
||||||
|
ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
|
||||||
|
ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
|
||||||
|
ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
|
||||||
|
ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
|
||||||
|
ucp_Thai = G_UNICODE_SCRIPT_THAI,
|
||||||
|
ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
|
||||||
|
ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
|
||||||
|
ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
|
||||||
|
ucp_Yi = G_UNICODE_SCRIPT_YI,
|
||||||
|
ucp_Balinese = G_UNICODE_SCRIPT_BALINESE, /* New for Unicode 5.0.0 */
|
||||||
|
ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, /* New for Unicode 5.0.0 */
|
||||||
|
ucp_Nko = G_UNICODE_SCRIPT_NKO, /* New for Unicode 5.0.0 */
|
||||||
|
ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, /* New for Unicode 5.0.0 */
|
||||||
|
ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN /* New for Unicode 5.0.0 */
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* End of ucp.h */
|
92
glib/pcre/ucpinternal.h
Normal file
92
glib/pcre/ucpinternal.h
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Unicode Property Table handler *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
#ifndef _UCPINTERNAL_H
|
||||||
|
#define _UCPINTERNAL_H
|
||||||
|
|
||||||
|
/* Internal header file defining the layout of the bits in each pair of 32-bit
|
||||||
|
words that form a data item in the table. */
|
||||||
|
|
||||||
|
typedef struct cnode {
|
||||||
|
pcre_uint32 f0;
|
||||||
|
pcre_uint32 f1;
|
||||||
|
} cnode;
|
||||||
|
|
||||||
|
/* Things for the f0 field */
|
||||||
|
|
||||||
|
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
||||||
|
#define f0_scriptshift 24 /* Shift for script value */
|
||||||
|
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
|
||||||
|
#define f0_charmask 0x001fffff /* Mask for code point value */
|
||||||
|
|
||||||
|
/* Things for the f1 field */
|
||||||
|
|
||||||
|
#define f1_typemask 0xfc000000 /* Mask for char type field */
|
||||||
|
#define f1_typeshift 26 /* Shift for the type field */
|
||||||
|
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
|
||||||
|
#define f1_casemask 0x0000ffff /* Mask for a case offset */
|
||||||
|
#define f1_caseneg 0xffff8000 /* Bits for negation */
|
||||||
|
|
||||||
|
/* The data consists of a vector of structures of type cnode. The two unsigned
|
||||||
|
32-bit integers are used as follows:
|
||||||
|
|
||||||
|
(f0) (1) The most significant byte holds the script number. The numbers are
|
||||||
|
defined by the enum in ucp.h.
|
||||||
|
|
||||||
|
(2) The 0x00800000 bit is set if this entry defines a range of characters.
|
||||||
|
It is not set if this entry defines a single character
|
||||||
|
|
||||||
|
(3) The 0x00600000 bits are spare.
|
||||||
|
|
||||||
|
(4) The 0x001fffff bits contain the code point. No Unicode code point will
|
||||||
|
ever be greater than 0x0010ffff, so this should be OK for ever.
|
||||||
|
|
||||||
|
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
|
||||||
|
defined by an enum in ucp.h.
|
||||||
|
|
||||||
|
(2) The 0x03ff0000 bits are spare.
|
||||||
|
|
||||||
|
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
|
||||||
|
range if this entry defines a range, OR the *signed* offset to the
|
||||||
|
character's "other case" partner if this entry defines a single
|
||||||
|
character. There is no partner if the value is zero.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
| | | | |
|
||||||
|
| | |-> spare | |-> spare
|
||||||
|
| | |
|
||||||
|
| |-> spare |-> spare
|
||||||
|
|
|
||||||
|
|-> range flag
|
||||||
|
|
||||||
|
The upper/lower casing information is set only for characters that come in
|
||||||
|
pairs. The non-one-to-one mappings in the Unicode data are ignored.
|
||||||
|
|
||||||
|
When searching the data, proceed as follows:
|
||||||
|
|
||||||
|
(1) Set up for a binary chop search.
|
||||||
|
|
||||||
|
(2) If the top is not greater than the bottom, the character is not in the
|
||||||
|
table. Its type must therefore be "Cn" ("Undefined").
|
||||||
|
|
||||||
|
(3) Find the middle vector element.
|
||||||
|
|
||||||
|
(4) Extract the code point and compare. If equal, we are done.
|
||||||
|
|
||||||
|
(5) If the test character is smaller, set the top to the current point, and
|
||||||
|
goto (2).
|
||||||
|
|
||||||
|
(6) If the current entry defines a range, compute the last character by adding
|
||||||
|
the offset, and see if the test character is within the range. If it is,
|
||||||
|
we are done.
|
||||||
|
|
||||||
|
(7) Otherwise, set the bottom to one element past the current point and goto
|
||||||
|
(2).
|
||||||
|
*/
|
||||||
|
|
||||||
|
#endif /* _UCPINTERNAL_H */
|
||||||
|
|
||||||
|
/* End of ucpinternal.h */
|
8
glib/update-pcre/Makefile.am
Normal file
8
glib/update-pcre/Makefile.am
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
EXTRA_DIST = \
|
||||||
|
update.sh \
|
||||||
|
Makefile.am-1 \
|
||||||
|
Makefile.am-2 \
|
||||||
|
digitab.patch \
|
||||||
|
memory.patch \
|
||||||
|
pcre_ucp_searchfuncs.c \
|
||||||
|
pcre_valid_utf8.c
|
28
glib/update-pcre/Makefile.am-1
Normal file
28
glib/update-pcre/Makefile.am-1
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
INCLUDES = \
|
||||||
|
-DG_LOG_DOMAIN=\"GLib-GRegex\" \
|
||||||
|
-DSUPPORT_UCP \
|
||||||
|
-DSUPPORT_UTF8 \
|
||||||
|
-DNEWLINE=-1 \
|
||||||
|
-DMATCH_LIMIT=10000000 \
|
||||||
|
-DMATCH_LIMIT_RECURSION=10000000 \
|
||||||
|
-DMAX_NAME_SIZE=32 \
|
||||||
|
-DMAX_NAME_COUNT=10000 \
|
||||||
|
-DMAX_DUPLENGTH=30000 \
|
||||||
|
-DLINK_SIZE=2 \
|
||||||
|
-DEBCDIC=0 \
|
||||||
|
-DPOSIX_MALLOC_THRESHOLD=10 \
|
||||||
|
-I$(top_srcdir) \
|
||||||
|
-I$(srcdir) \
|
||||||
|
-I$(top_srcdir)/glib \
|
||||||
|
@GLIB_DEBUG_FLAGS@ \
|
||||||
|
-DG_DISABLE_DEPRECATED \
|
||||||
|
$(DEPRECATED_FLAGS)\
|
||||||
|
$(WARN_CFLAGS) \
|
||||||
|
$(PCRE_WARN_CFLAGS) \
|
||||||
|
$(DEP_CFLAGS)
|
||||||
|
|
||||||
|
noinst_LTLIBRARIES = libpcre.la
|
||||||
|
|
||||||
|
libpcre_headers =
|
||||||
|
|
||||||
|
libpcre_la_SOURCES = \
|
10
glib/update-pcre/Makefile.am-2
Normal file
10
glib/update-pcre/Makefile.am-2
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
$(libpcre_headers)
|
||||||
|
|
||||||
|
libpcre_la_LIBADD = $(DEP_LIBS)
|
||||||
|
|
||||||
|
libpcre_la_LDFLAGS = -no-undefined
|
||||||
|
|
||||||
|
EXTRA_DIST = \
|
||||||
|
COPYING \
|
||||||
|
makefile.msc
|
||||||
|
|
133
glib/update-pcre/digitab.patch
Normal file
133
glib/update-pcre/digitab.patch
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
--- pcre_compile.c 2006-10-10 12:00:00.000000000 +0200
|
||||||
|
+++ pcre_compile.c 2006-10-10 12:00:00.000000000 +0200
|
||||||
|
@@ -246,130 +246,6 @@ static const char *error_texts[] = {
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
-/* Table to identify digits and hex digits. This is used when compiling
|
||||||
|
-patterns. Note that the tables in chartables are dependent on the locale, and
|
||||||
|
-may mark arbitrary characters as digits - but the PCRE compiling code expects
|
||||||
|
-to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have
|
||||||
|
-a private table here. It costs 256 bytes, but it is a lot faster than doing
|
||||||
|
-character value tests (at least in some simple cases I timed), and in some
|
||||||
|
-applications one wants PCRE to compile efficiently as well as match
|
||||||
|
-efficiently.
|
||||||
|
-
|
||||||
|
-For convenience, we use the same bit definitions as in chartables:
|
||||||
|
-
|
||||||
|
- 0x04 decimal digit
|
||||||
|
- 0x08 hexadecimal digit
|
||||||
|
-
|
||||||
|
-Then we can use ctype_digit and ctype_xdigit in the code. */
|
||||||
|
-
|
||||||
|
-#if !EBCDIC /* This is the "normal" case, for ASCII systems */
|
||||||
|
-static const unsigned char digitab[] =
|
||||||
|
- {
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
|
||||||
|
- 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */
|
||||||
|
- 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
|
||||||
|
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */
|
||||||
|
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||||
|
-
|
||||||
|
-#else /* This is the "abnormal" case, for EBCDIC systems */
|
||||||
|
-static const unsigned char digitab[] =
|
||||||
|
- {
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- ¬ */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */
|
||||||
|
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||||
|
- 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */
|
||||||
|
- 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */
|
||||||
|
- 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
|
||||||
|
-
|
||||||
|
-static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
|
||||||
|
- 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */
|
||||||
|
- 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */
|
||||||
|
- 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */
|
||||||
|
- 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- ¬ */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
|
||||||
|
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */
|
||||||
|
- 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */
|
||||||
|
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */
|
||||||
|
- 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */
|
||||||
|
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */
|
||||||
|
- 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */
|
||||||
|
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */
|
||||||
|
- 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */
|
||||||
|
- 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||||
|
- 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */
|
||||||
|
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */
|
||||||
|
- 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */
|
||||||
|
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */
|
||||||
|
- 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */
|
||||||
|
- 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */
|
||||||
|
- 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||||
|
- 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
-
|
||||||
|
/* Definition to allow mutual recursion */
|
||||||
|
|
||||||
|
static BOOL
|
87
glib/update-pcre/memory.patch
Normal file
87
glib/update-pcre/memory.patch
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
diff -r 0f4042339eb5 pcre/pcre.h
|
||||||
|
--- pcre/pcre.h Tue Jul 25 22:39:16 2006 +0200
|
||||||
|
+++ pcre/pcre.h Tue Jul 25 22:52:10 2006 +0200
|
||||||
|
@@ -233,25 +233,14 @@ typedef struct pcre_callout_block {
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
} pcre_callout_block;
|
||||||
|
|
||||||
|
-/* Indirection for store get and free functions. These can be set to
|
||||||
|
-alternative malloc/free functions if required. Special ones are used in the
|
||||||
|
-non-recursive case for "frames". There is also an optional callout function
|
||||||
|
-that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||||
|
-have to take another form. */
|
||||||
|
-
|
||||||
|
-#ifndef VPCOMPAT
|
||||||
|
-PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
|
||||||
|
-PCRE_DATA_SCOPE void (*pcre_free)(void *);
|
||||||
|
-PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
|
||||||
|
-PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
|
||||||
|
+#include "glib.h"
|
||||||
|
+#include "galias.h"
|
||||||
|
+
|
||||||
|
+#define pcre_malloc g_try_malloc
|
||||||
|
+#define pcre_free g_free
|
||||||
|
+#define pcre_stack_malloc g_try_malloc
|
||||||
|
+
|
||||||
|
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
||||||
|
-#else /* VPCOMPAT */
|
||||||
|
-PCRE_DATA_SCOPE void *pcre_malloc(size_t);
|
||||||
|
-PCRE_DATA_SCOPE void pcre_free(void *);
|
||||||
|
-PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t);
|
||||||
|
-PCRE_DATA_SCOPE void pcre_stack_free(void *);
|
||||||
|
-PCRE_DATA_SCOPE int pcre_callout(pcre_callout_block *);
|
||||||
|
-#endif /* VPCOMPAT */
|
||||||
|
|
||||||
|
/* Exported PCRE functions */
|
||||||
|
|
||||||
|
diff -r 0f4042339eb5 pcre/pcre_globals.c
|
||||||
|
--- pcre/pcre_globals.c Tue Jul 25 22:39:16 2006 +0200
|
||||||
|
+++ pcre/pcre_globals.c Tue Jul 25 22:52:10 2006 +0200
|
||||||
|
@@ -50,32 +50,9 @@ differently, and global variables are no
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
-#ifndef VPCOMPAT
|
||||||
|
-
|
||||||
|
-/**************************************************************************
|
||||||
|
-This code used to be here for use when compiling as a C++ library. However,
|
||||||
|
-according to Dair Grant it is not needed: "
|
||||||
|
-
|
||||||
|
- Including 'extern "C"' in the declaration generates an "initialized and
|
||||||
|
- declared `extern'" warning from gcc 4.0.1. Since we include pcre_internal.h,
|
||||||
|
- which includes pcre.h, which declares these prototypes within an extern "C" {}
|
||||||
|
- block, we shouldn't need the prefix here.
|
||||||
|
-
|
||||||
|
-So, from Release 7.0 I have cut this out.
|
||||||
|
-
|
||||||
|
#ifdef __cplusplus
|
||||||
|
-extern "C" void *(*pcre_malloc)(size_t) = malloc;
|
||||||
|
-extern "C" void (*pcre_free)(void *) = free;
|
||||||
|
-extern "C" void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||||
|
-extern "C" void (*pcre_stack_free)(void *) = free;
|
||||||
|
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||||
|
#else
|
||||||
|
-**************************************************************************/
|
||||||
|
-
|
||||||
|
-void *(*pcre_malloc)(size_t) = malloc;
|
||||||
|
-void (*pcre_free)(void *) = free;
|
||||||
|
-void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||||
|
-void (*pcre_stack_free)(void *) = free;
|
||||||
|
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
diff -r 0f4042339eb5 pcre/pcre_internal.h
|
||||||
|
--- pcre/pcre_internal.h Tue Jul 25 22:39:16 2006 +0200
|
||||||
|
+++ pcre/pcre_internal.h Tue Jul 25 22:52:10 2006 +0200
|
||||||
|
@@ -480,10 +480,7 @@ variable-length repeat, or a anything ot
|
||||||
|
|
||||||
|
/* Miscellaneous definitions */
|
||||||
|
|
||||||
|
-typedef int BOOL;
|
||||||
|
-
|
||||||
|
-#define FALSE 0
|
||||||
|
-#define TRUE 1
|
||||||
|
+typedef gboolean BOOL;
|
||||||
|
|
||||||
|
/* Escape items that are just an encoding of a particular data value. */
|
||||||
|
|
126
glib/update-pcre/pcre_ucp_searchfuncs.c
Normal file
126
glib/update-pcre/pcre_ucp_searchfuncs.c
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2006 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This file has been modified to use glib instead of the internal table
|
||||||
|
* in ucptable.c -- Marco Barisione */
|
||||||
|
|
||||||
|
/* This module contains code for searching the table of Unicode character
|
||||||
|
properties. */
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
#include "ucp.h" /* Category definitions */
|
||||||
|
#include "ucpinternal.h" /* Internal table details */
|
||||||
|
|
||||||
|
|
||||||
|
/* Table to translate from particular type value to the general value. */
|
||||||
|
|
||||||
|
static int ucp_gentype[] = {
|
||||||
|
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||||
|
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||||
|
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||||
|
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||||
|
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||||
|
ucp_P, ucp_P, /* Ps, Po */
|
||||||
|
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||||
|
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Search table and return type *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
||||||
|
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
c the character value
|
||||||
|
type_ptr the detailed character type is returned here
|
||||||
|
script_ptr the script is returned here
|
||||||
|
|
||||||
|
Returns: the character type category
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
|
||||||
|
{
|
||||||
|
/* Note that the Unicode types have the same values in glib and in
|
||||||
|
* PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
|
||||||
|
* ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
|
||||||
|
*type_ptr = g_unichar_type(c);
|
||||||
|
*script_ptr = g_unichar_get_script(c);
|
||||||
|
return ucp_gentype[*type_ptr];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Search table and return other case *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* If the given character is a letter, and there is another case for the
|
||||||
|
letter, return the other case. Otherwise, return -1.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
c the character value
|
||||||
|
|
||||||
|
Returns: the other case or NOTACHAR if none
|
||||||
|
*/
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
_pcre_ucp_othercase(const unsigned int c)
|
||||||
|
{
|
||||||
|
int other_case = NOTACHAR;
|
||||||
|
|
||||||
|
if (g_unichar_islower(c))
|
||||||
|
other_case = g_unichar_toupper(c);
|
||||||
|
else if (g_unichar_isupper(c))
|
||||||
|
other_case = g_unichar_tolower(c);
|
||||||
|
|
||||||
|
if (other_case == c)
|
||||||
|
other_case = NOTACHAR;
|
||||||
|
|
||||||
|
return other_case;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* End of pcre_ucp_searchfuncs.c */
|
13
glib/update-pcre/pcre_valid_utf8.c
Normal file
13
glib/update-pcre/pcre_valid_utf8.c
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function is not needed by GRegex, so print an error and
|
||||||
|
* return always -1, that is the string is a valid UTF-8 encoded
|
||||||
|
* string.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
_pcre_valid_utf8(const uschar *string, int length)
|
||||||
|
{
|
||||||
|
g_warning ("%s: this function should not be called", G_STRLOC);
|
||||||
|
return -1;
|
||||||
|
}
|
141
glib/update-pcre/ucp.patch
Normal file
141
glib/update-pcre/ucp.patch
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
--- pcre/ucp.h 2006-07-05 13:28:01.000000000 +0200
|
||||||
|
+++ pcre/ucp.h 2006-10-09 16:27:19.000000000 +0200
|
||||||
|
@@ -60,72 +60,72 @@ enum {
|
||||||
|
/* These are the script identifications. */
|
||||||
|
|
||||||
|
enum {
|
||||||
|
- ucp_Arabic,
|
||||||
|
- ucp_Armenian,
|
||||||
|
- ucp_Bengali,
|
||||||
|
- ucp_Bopomofo,
|
||||||
|
- ucp_Braille,
|
||||||
|
- ucp_Buginese,
|
||||||
|
- ucp_Buhid,
|
||||||
|
- ucp_Canadian_Aboriginal,
|
||||||
|
- ucp_Cherokee,
|
||||||
|
- ucp_Common,
|
||||||
|
- ucp_Coptic,
|
||||||
|
- ucp_Cypriot,
|
||||||
|
- ucp_Cyrillic,
|
||||||
|
- ucp_Deseret,
|
||||||
|
- ucp_Devanagari,
|
||||||
|
- ucp_Ethiopic,
|
||||||
|
- ucp_Georgian,
|
||||||
|
- ucp_Glagolitic,
|
||||||
|
- ucp_Gothic,
|
||||||
|
- ucp_Greek,
|
||||||
|
- ucp_Gujarati,
|
||||||
|
- ucp_Gurmukhi,
|
||||||
|
- ucp_Han,
|
||||||
|
- ucp_Hangul,
|
||||||
|
- ucp_Hanunoo,
|
||||||
|
- ucp_Hebrew,
|
||||||
|
- ucp_Hiragana,
|
||||||
|
- ucp_Inherited,
|
||||||
|
- ucp_Kannada,
|
||||||
|
- ucp_Katakana,
|
||||||
|
- ucp_Kharoshthi,
|
||||||
|
- ucp_Khmer,
|
||||||
|
- ucp_Lao,
|
||||||
|
- ucp_Latin,
|
||||||
|
- ucp_Limbu,
|
||||||
|
- ucp_Linear_B,
|
||||||
|
- ucp_Malayalam,
|
||||||
|
- ucp_Mongolian,
|
||||||
|
- ucp_Myanmar,
|
||||||
|
- ucp_New_Tai_Lue,
|
||||||
|
- ucp_Ogham,
|
||||||
|
- ucp_Old_Italic,
|
||||||
|
- ucp_Old_Persian,
|
||||||
|
- ucp_Oriya,
|
||||||
|
- ucp_Osmanya,
|
||||||
|
- ucp_Runic,
|
||||||
|
- ucp_Shavian,
|
||||||
|
- ucp_Sinhala,
|
||||||
|
- ucp_Syloti_Nagri,
|
||||||
|
- ucp_Syriac,
|
||||||
|
- ucp_Tagalog,
|
||||||
|
- ucp_Tagbanwa,
|
||||||
|
- ucp_Tai_Le,
|
||||||
|
- ucp_Tamil,
|
||||||
|
- ucp_Telugu,
|
||||||
|
- ucp_Thaana,
|
||||||
|
- ucp_Thai,
|
||||||
|
- ucp_Tibetan,
|
||||||
|
- ucp_Tifinagh,
|
||||||
|
- ucp_Ugaritic,
|
||||||
|
- ucp_Yi,
|
||||||
|
- ucp_Balinese, /* New for Unicode 5.0.0 */
|
||||||
|
- ucp_Cuneiform, /* New for Unicode 5.0.0 */
|
||||||
|
- ucp_Nko, /* New for Unicode 5.0.0 */
|
||||||
|
- ucp_Phags_Pa, /* New for Unicode 5.0.0 */
|
||||||
|
- ucp_Phoenician /* New for Unicode 5.0.0 */
|
||||||
|
+ ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
|
||||||
|
+ ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
|
||||||
|
+ ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
|
||||||
|
+ ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
|
||||||
|
+ ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
|
||||||
|
+ ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
|
||||||
|
+ ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
|
||||||
|
+ ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
|
||||||
|
+ ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
|
||||||
|
+ ucp_Common = G_UNICODE_SCRIPT_COMMON,
|
||||||
|
+ ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
|
||||||
|
+ ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
|
||||||
|
+ ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
|
||||||
|
+ ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
|
||||||
|
+ ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
|
||||||
|
+ ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
|
||||||
|
+ ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
|
||||||
|
+ ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
|
||||||
|
+ ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
|
||||||
|
+ ucp_Greek = G_UNICODE_SCRIPT_GREEK,
|
||||||
|
+ ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
|
||||||
|
+ ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
|
||||||
|
+ ucp_Han = G_UNICODE_SCRIPT_HAN,
|
||||||
|
+ ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
|
||||||
|
+ ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
|
||||||
|
+ ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
|
||||||
|
+ ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
|
||||||
|
+ ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
|
||||||
|
+ ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
|
||||||
|
+ ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
|
||||||
|
+ ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
|
||||||
|
+ ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
|
||||||
|
+ ucp_Lao = G_UNICODE_SCRIPT_LAO,
|
||||||
|
+ ucp_Latin = G_UNICODE_SCRIPT_LATIN,
|
||||||
|
+ ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
|
||||||
|
+ ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
|
||||||
|
+ ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
|
||||||
|
+ ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
|
||||||
|
+ ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
|
||||||
|
+ ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
|
||||||
|
+ ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
|
||||||
|
+ ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
|
||||||
|
+ ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
|
||||||
|
+ ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
|
||||||
|
+ ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
|
||||||
|
+ ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
|
||||||
|
+ ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
|
||||||
|
+ ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
|
||||||
|
+ ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
|
||||||
|
+ ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
|
||||||
|
+ ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
|
||||||
|
+ ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
|
||||||
|
+ ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
|
||||||
|
+ ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
|
||||||
|
+ ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
|
||||||
|
+ ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
|
||||||
|
+ ucp_Thai = G_UNICODE_SCRIPT_THAI,
|
||||||
|
+ ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
|
||||||
|
+ ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
|
||||||
|
+ ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
|
||||||
|
+ ucp_Yi = G_UNICODE_SCRIPT_YI,
|
||||||
|
+ ucp_Balinese = G_UNICODE_SCRIPT_BALINESE, /* New for Unicode 5.0.0 */
|
||||||
|
+ ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, /* New for Unicode 5.0.0 */
|
||||||
|
+ ucp_Nko = G_UNICODE_SCRIPT_NKO, /* New for Unicode 5.0.0 */
|
||||||
|
+ ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, /* New for Unicode 5.0.0 */
|
||||||
|
+ ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN /* New for Unicode 5.0.0 */
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
159
glib/update-pcre/update.sh
Normal file
159
glib/update-pcre/update.sh
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
#! /bin/sh
|
||||||
|
|
||||||
|
IN="../update-pcre"
|
||||||
|
PCRE=$1
|
||||||
|
|
||||||
|
if [ "x$PCRE" = x -o "x$PCRE" = x--help -o "x$PCRE" = x-h ]; then
|
||||||
|
cat >&2 << EOF
|
||||||
|
|
||||||
|
$0 PCRE-DIR
|
||||||
|
|
||||||
|
Updates the local PCRE copy with a different version of the library,
|
||||||
|
contained in the directory PCRE-DIR.
|
||||||
|
|
||||||
|
This will delete the content of the local pcre directory, copy the
|
||||||
|
necessary files from PCRE-DIR, and generate other needed files, such
|
||||||
|
as Makefile.am
|
||||||
|
EOF
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f gregex.h ]; then
|
||||||
|
echo "This script should be executed from the directory containing gregex.c." 2> /dev/null
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f $PCRE/Makefile.in -o ! -f $PCRE/pcre_compile.c ]; then
|
||||||
|
echo "'$PCRE' does not contain a valid PCRE version." 2> /dev/null
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
echo "Deleting old PCRE library"
|
||||||
|
mv pcre/.svn tmp-pcre-svn
|
||||||
|
rm -R pcre 2> /dev/null
|
||||||
|
mkdir pcre
|
||||||
|
cd pcre
|
||||||
|
|
||||||
|
# pcre_chartables.c is generated by dfatables.
|
||||||
|
# We do not want to compile and execute dfatables.c every time, because
|
||||||
|
# this could be a problem (e.g. when cross-compiling), so now generate
|
||||||
|
# the file and then distribuite it with GRegex.
|
||||||
|
echo "Generating pcre_chartables.c"
|
||||||
|
cp -R $PCRE tmp-build
|
||||||
|
cd tmp-build
|
||||||
|
./configure --enable-utf8 --enable-unicode-properties --disable-cpp > /dev/null
|
||||||
|
make pcre_chartables.c > /dev/null
|
||||||
|
cat > ../pcre_chartables.c << \EOF
|
||||||
|
/* This file is autogenerated by ../update-pcre/update.sh during
|
||||||
|
* the update of the local copy of PCRE.
|
||||||
|
*/
|
||||||
|
EOF
|
||||||
|
cat pcre_chartables.c >> ../pcre_chartables.c
|
||||||
|
cd ..
|
||||||
|
rm -R tmp-build
|
||||||
|
|
||||||
|
# Compiled C files.
|
||||||
|
echo "Generating makefiles"
|
||||||
|
all_files=`awk '/^OBJ = /, /^\\s*$/ \
|
||||||
|
{ \
|
||||||
|
sub("^OBJ = ", ""); \
|
||||||
|
sub(".@OBJEXT@[[:blank:]]*\\\\\\\\", ""); \
|
||||||
|
sub("\\\\$\\\\(POSIX_OBJ\\\\)", ""); \
|
||||||
|
print; \
|
||||||
|
}' \
|
||||||
|
$PCRE/Makefile.in`
|
||||||
|
|
||||||
|
# Headers.
|
||||||
|
included_files="pcre.h pcre_internal.h ucp.h ucpinternal.h"
|
||||||
|
|
||||||
|
# Generate Makefile.am.
|
||||||
|
cat $IN/Makefile.am-1 > Makefile.am
|
||||||
|
for name in $all_files; do
|
||||||
|
echo " $name.c \\" >> Makefile.am
|
||||||
|
if [ $name != pcre_chartables ]; then
|
||||||
|
# pcre_chartables.c is a generated file.
|
||||||
|
cp $PCRE/$name.c .
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
for f in $included_files; do
|
||||||
|
echo " $f \\" >> Makefile.am
|
||||||
|
cp $PCRE/$f .
|
||||||
|
done
|
||||||
|
cat $IN/Makefile.am-2 >> Makefile.am
|
||||||
|
|
||||||
|
# Generate makefile.msc
|
||||||
|
cat > makefile.msc << EOF
|
||||||
|
!IFDEF DEBUG
|
||||||
|
CRT=-MDd
|
||||||
|
!ELSE
|
||||||
|
CRT=-MD
|
||||||
|
!ENDIF
|
||||||
|
|
||||||
|
CFLAGS = \\
|
||||||
|
-I ..\\.. \\
|
||||||
|
-DHAVE_CONFIG_H \\
|
||||||
|
-DHAVE_LONG_LONG_FORMAT \\
|
||||||
|
-DSUPPORT_UCP \\
|
||||||
|
-DSUPPORT_UTF8 \\
|
||||||
|
-DNEWLINE=10 \\
|
||||||
|
-DMATCH_LIMIT=10000000 \\
|
||||||
|
-DMATCH_LIMIT_RECURSION=10000000 \\
|
||||||
|
-DMAX_NAME_SIZE=32 \\
|
||||||
|
-DMAX_NAME_COUNT=10000 \\
|
||||||
|
-DMAX_DUPLENGTH=30000 \\
|
||||||
|
-DLINK_SIZE=2 \\
|
||||||
|
-DEBCDIC=0 \\
|
||||||
|
-DPOSIX_MALLOC_THRESHOLD=10
|
||||||
|
|
||||||
|
OBJECTS = \\
|
||||||
|
`
|
||||||
|
for f in $all_files; do
|
||||||
|
echo " $f.obj \\\\"
|
||||||
|
done
|
||||||
|
`
|
||||||
|
|
||||||
|
pcre.lib : \$(OBJECTS)
|
||||||
|
lib -out:pcre.lib \$(OBJECTS)
|
||||||
|
|
||||||
|
.c.obj:
|
||||||
|
\$(CC) \$(CRT) \$(CFLAGS) -Ox -GD -c $<
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo "Patching PCRE"
|
||||||
|
|
||||||
|
# Copy the license.
|
||||||
|
cp $PCRE/COPYING .
|
||||||
|
|
||||||
|
# Use glib for memory allocation.
|
||||||
|
patch > /dev/null < $IN/memory.patch
|
||||||
|
|
||||||
|
# Copy the modified version of pcre_valid_utf8.c.
|
||||||
|
cp $IN/pcre_valid_utf8.c .
|
||||||
|
|
||||||
|
# Copy the modified version of pcre_ucp_searchfuncs.c that uses glib
|
||||||
|
# for Unicode properties.
|
||||||
|
cp $IN/pcre_ucp_searchfuncs.c .
|
||||||
|
patch > /dev/null < $IN/ucp.patch
|
||||||
|
|
||||||
|
# Remove the digitab array in pcre_compile.c.
|
||||||
|
patch > /dev/null < $IN/digitab.patch
|
||||||
|
sed -i -e 's/(digitab\[\(.*\)\] & ctype_digit)/g_ascii_isdigit(\1)/' pcre_compile.c
|
||||||
|
sed -i -e 's/(digitab\[\(.*\)\] & ctype_xdigit)/g_ascii_isxdigit(\1)/' pcre_compile.c
|
||||||
|
|
||||||
|
# Reduce the number of relocations.
|
||||||
|
$IN/make_utt.py
|
||||||
|
patch > /dev/null < $IN/utt.patch
|
||||||
|
patch > /dev/null < $IN/table-reduction.patch
|
||||||
|
|
||||||
|
# Copy back the old SVN directory.
|
||||||
|
mv ../tmp-pcre-svn .svn
|
||||||
|
|
||||||
|
|
||||||
|
cat << EOF
|
||||||
|
|
||||||
|
Update completed. You now should check that everything is working.
|
||||||
|
Remember to update the regex syntax doc with the new features
|
||||||
|
(docs/reference/glib/regex-syntax.sgml) and to run the tests.
|
||||||
|
EOF
|
||||||
|
|
@ -1,6 +1,12 @@
|
|||||||
SUBDIRS=gobject refcount
|
SUBDIRS=gobject refcount
|
||||||
|
|
||||||
INCLUDES = -g -I$(top_srcdir) -I$(top_srcdir)/glib -I$(top_srcdir)/gmodule $(GLIB_DEBUG_FLAGS)
|
if ENABLE_REGEX
|
||||||
|
enable_regex = -DENABLE_REGEX
|
||||||
|
else
|
||||||
|
enable_regex =
|
||||||
|
endif
|
||||||
|
|
||||||
|
INCLUDES = -g -I$(top_srcdir) -I$(top_srcdir)/glib -I$(top_srcdir)/gmodule $(GLIB_DEBUG_FLAGS) $(enable_regex)
|
||||||
|
|
||||||
EFENCE=
|
EFENCE=
|
||||||
|
|
||||||
@ -112,7 +118,8 @@ test_programs = \
|
|||||||
unicode-encoding \
|
unicode-encoding \
|
||||||
utf8-validate \
|
utf8-validate \
|
||||||
utf8-pointer \
|
utf8-pointer \
|
||||||
uri-test
|
uri-test \
|
||||||
|
regex-test
|
||||||
|
|
||||||
test_scripts = run-markup-tests.sh run-collate-tests.sh run-bookmark-test.sh
|
test_scripts = run-markup-tests.sh run-collate-tests.sh run-bookmark-test.sh
|
||||||
|
|
||||||
@ -183,6 +190,7 @@ unicode_collate_LDADD = $(progs_ldadd)
|
|||||||
utf8_validate_LDADD = $(progs_ldadd)
|
utf8_validate_LDADD = $(progs_ldadd)
|
||||||
utf8_pointer_LDADD = $(progs_ldadd)
|
utf8_pointer_LDADD = $(progs_ldadd)
|
||||||
uri_test_LDADD = $(progs_ldadd)
|
uri_test_LDADD = $(progs_ldadd)
|
||||||
|
regex_test_LDADD = $(progs_ldadd)
|
||||||
|
|
||||||
lib_LTLIBRARIES = libmoduletestplugin_a.la libmoduletestplugin_b.la
|
lib_LTLIBRARIES = libmoduletestplugin_a.la libmoduletestplugin_b.la
|
||||||
|
|
||||||
|
2607
tests/regex-test.c
Normal file
2607
tests/regex-test.c
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user