diff -pru --unidirectional-new-file Makefile.in regex/irssi-1.0.2/Makefile.in --- Makefile.in 2017-03-10 17:43:21.000000000 +0100 +++ regex/irssi-1.0.2/Makefile.in 2017-03-11 21:57:27.000000000 +0100 @@ -207,7 +207,7 @@ am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/src/perl/textui/Makefile.PL.in \ $(top_srcdir)/src/perl/ui/Makefile.PL.in AUTHORS COPYING \ ChangeLog INSTALL NEWS TODO build-aux/compile \ - build-aux/config.guess build-aux/config.sub \ + build-aux/config.guess build-aux/config.sub build-aux/depcomp \ build-aux/install-sh build-aux/ltmain.sh build-aux/missing DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) diff -pru --unidirectional-new-file configure regex/irssi-1.0.2/configure --- configure 2017-03-10 17:43:20.000000000 +0100 +++ regex/irssi-1.0.2/configure 2017-03-11 21:57:27.000000000 +0100 @@ -638,6 +638,8 @@ COMMON_NOUI_LIBS irc_MODULES CHAT_MODULES PROG_LIBS +USE_GREGEX_FALSE +USE_GREGEX_TRUE HAVE_PERL_FALSE HAVE_PERL_TRUE BUILD_IRSSIPROXY_FALSE @@ -13834,6 +13836,14 @@ else HAVE_PERL_FALSE= fi + if test "x$want_gregex" = "xyes"; then + USE_GREGEX_TRUE= + USE_GREGEX_FALSE='#' +else + USE_GREGEX_TRUE='#' + USE_GREGEX_FALSE= +fi + # move LIBS to PROG_LIBS so they're not tried to be used when linking eg. perl libraries PROG_LIBS=$LIBS @@ -14086,6 +14096,10 @@ if test -z "${HAVE_PERL_TRUE}" && test - as_fn_error $? "conditional \"HAVE_PERL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${USE_GREGEX_TRUE}" && test -z "${USE_GREGEX_FALSE}"; then + as_fn_error $? "conditional \"USE_GREGEX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 diff -pru --unidirectional-new-file configure.ac regex/irssi-1.0.2/configure.ac --- configure.ac 2017-03-10 17:31:03.000000000 +0100 +++ regex/irssi-1.0.2/configure.ac 2017-03-11 21:57:00.000000000 +0100 @@ -458,6 +458,7 @@ AM_CONDITIONAL(BUILD_TEXTUI, test "$want AM_CONDITIONAL(BUILD_IRSSIBOT, test "$want_irssibot" = "yes") AM_CONDITIONAL(BUILD_IRSSIPROXY, test "$want_irssiproxy" = "yes") AM_CONDITIONAL(HAVE_PERL, test "$want_perl" != "no") +AM_CONDITIONAL(USE_GREGEX, test "x$want_gregex" = "xyes") # move LIBS to PROG_LIBS so they're not tried to be used when linking eg. perl libraries PROG_LIBS=$LIBS diff -pru --unidirectional-new-file src/core/Makefile.am regex/irssi-1.0.2/src/core/Makefile.am --- src/core/Makefile.am 2017-03-04 21:30:38.000000000 +0100 +++ regex/irssi-1.0.2/src/core/Makefile.am 2017-03-11 21:57:00.000000000 +0100 @@ -7,6 +7,12 @@ AM_CPPFLAGS = \ -DSYSCONFDIR=\""$(sysconfdir)"\" \ -DMODULEDIR=\""$(libdir)/irssi/modules"\" +if USE_GREGEX +regex_impl=iregex-gregex.c +else +regex_impl=iregex-regexh.c +endif + libcore_a_SOURCES = \ args.c \ channels.c \ @@ -45,6 +51,7 @@ libcore_a_SOURCES = \ signals.c \ special-vars.c \ utf8.c \ + $(regex_impl) \ wcwidth.c \ tls.c \ write-buffer.c @@ -97,6 +104,7 @@ pkginc_core_HEADERS = \ signals.h \ special-vars.h \ utf8.h \ + iregex.h \ window-item-def.h \ tls.h \ write-buffer.h \ diff -pru --unidirectional-new-file src/core/Makefile.in regex/irssi-1.0.2/src/core/Makefile.in --- src/core/Makefile.in 2017-03-10 17:43:21.000000000 +0100 +++ regex/irssi-1.0.2/src/core/Makefile.in 2017-03-11 21:57:27.000000000 +0100 @@ -111,6 +111,17 @@ am__v_AR_0 = @echo " AR " $@; am__v_AR_1 = libcore_a_AR = $(AR) $(ARFLAGS) libcore_a_LIBADD = +am__libcore_a_SOURCES_DIST = args.c channels.c channels-setup.c \ + commands.c chat-commands.c chat-protocols.c chatnets.c core.c \ + expandos.c ignore.c levels.c line-split.c log.c log-away.c \ + masks.c misc.c modules.c modules-load.c net-disconnect.c \ + net-nonblock.c net-sendbuffer.c network.c network-openssl.c \ + nicklist.c nickmatch-cache.c pidwait.c queries.c rawlog.c \ + recode.c servers.c servers-reconnect.c servers-setup.c \ + session.c settings.c signals.c special-vars.c utf8.c \ + iregex-regexh.c iregex-gregex.c wcwidth.c tls.c write-buffer.c +@USE_GREGEX_FALSE@am__objects_1 = iregex-regexh.$(OBJEXT) +@USE_GREGEX_TRUE@am__objects_1 = iregex-gregex.$(OBJEXT) am_libcore_a_OBJECTS = args.$(OBJEXT) channels.$(OBJEXT) \ channels-setup.$(OBJEXT) commands.$(OBJEXT) \ chat-commands.$(OBJEXT) chat-protocols.$(OBJEXT) \ @@ -125,8 +136,8 @@ am_libcore_a_OBJECTS = args.$(OBJEXT) ch rawlog.$(OBJEXT) recode.$(OBJEXT) servers.$(OBJEXT) \ servers-reconnect.$(OBJEXT) servers-setup.$(OBJEXT) \ session.$(OBJEXT) settings.$(OBJEXT) signals.$(OBJEXT) \ - special-vars.$(OBJEXT) utf8.$(OBJEXT) wcwidth.$(OBJEXT) \ - tls.$(OBJEXT) write-buffer.$(OBJEXT) + special-vars.$(OBJEXT) utf8.$(OBJEXT) $(am__objects_1) \ + wcwidth.$(OBJEXT) tls.$(OBJEXT) write-buffer.$(OBJEXT) libcore_a_OBJECTS = $(am_libcore_a_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -167,7 +178,7 @@ am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(libcore_a_SOURCES) -DIST_SOURCES = $(libcore_a_SOURCES) +DIST_SOURCES = $(am__libcore_a_SOURCES_DIST) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -378,6 +389,8 @@ AM_CPPFLAGS = \ -DSYSCONFDIR=\""$(sysconfdir)"\" \ -DMODULEDIR=\""$(libdir)/irssi/modules"\" +@USE_GREGEX_FALSE@regex_impl = iregex-regexh.c +@USE_GREGEX_TRUE@regex_impl = iregex-gregex.c libcore_a_SOURCES = \ args.c \ channels.c \ @@ -416,6 +429,7 @@ libcore_a_SOURCES = \ signals.c \ special-vars.c \ utf8.c \ + $(regex_impl) \ wcwidth.c \ tls.c \ write-buffer.c @@ -468,6 +482,7 @@ pkginc_core_HEADERS = \ signals.h \ special-vars.h \ utf8.h \ + iregex.h \ window-item-def.h \ tls.h \ write-buffer.h \ @@ -531,6 +546,8 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/core.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/expandos.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ignore.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iregex-gregex.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iregex-regexh.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/levels.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/line-split.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/log-away.Po@am__quote@ diff -pru --unidirectional-new-file src/core/ignore.c regex/irssi-1.0.2/src/core/ignore.c --- src/core/ignore.c 2017-03-04 21:30:38.000000000 +0100 +++ regex/irssi-1.0.2/src/core/ignore.c 2017-03-11 21:57:00.000000000 +0100 @@ -24,6 +24,7 @@ #include "levels.h" #include "lib-config/iconfig.h" #include "settings.h" +#include "iregex.h" #include "masks.h" #include "servers.h" @@ -67,13 +68,8 @@ static int ignore_match_pattern(IGNORE_R return FALSE; if (rec->regexp) { -#ifdef USE_GREGEX return rec->preg != NULL && - g_regex_match(rec->preg, text, 0, NULL); -#else - return rec->regexp_compiled && - regexec(&rec->preg, text, 0, NULL, 0) == 0; -#endif + i_regex_match(rec->preg, text, 0, NULL, NULL); } return rec->fullword ? @@ -327,41 +323,19 @@ static void ignore_remove_config(IGNORE_ static void ignore_init_rec(IGNORE_REC *rec) { -#ifdef USE_GREGEX if (rec->preg != NULL) - g_regex_unref(rec->preg); + i_regex_unref(rec->preg); if (rec->regexp && rec->pattern != NULL) { GError *re_error = NULL; - rec->preg = g_regex_new(rec->pattern, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_CASELESS, 0, &re_error); + rec->preg = i_regex_new(rec->pattern, G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, &re_error); if (rec->preg == NULL) { g_warning("Failed to compile regexp '%s': %s", rec->pattern, re_error->message); g_error_free(re_error); } } -#else - char *errbuf; - int errcode, errbuf_len; - - if (rec->regexp_compiled) regfree(&rec->preg); - rec->regexp_compiled = FALSE; - - if (rec->regexp && rec->pattern != NULL) { - errcode = regcomp(&rec->preg, rec->pattern, - REG_EXTENDED|REG_ICASE|REG_NOSUB); - if (errcode != 0) { - errbuf_len = regerror(errcode, &rec->preg, 0, 0); - errbuf = g_malloc(errbuf_len); - regerror(errcode, &rec->preg, errbuf, errbuf_len); - g_warning("Failed to compile regexp '%s': %s", rec->pattern, errbuf); - g_free(errbuf); - } else { - rec->regexp_compiled = TRUE; - } - } -#endif } void ignore_add_rec(IGNORE_REC *rec) @@ -381,11 +355,7 @@ static void ignore_destroy(IGNORE_REC *r if (send_signal) signal_emit("ignore destroyed", 1, rec); -#ifdef USE_GREGEX - if (rec->preg != NULL) g_regex_unref(rec->preg); -#else - if (rec->regexp_compiled) regfree(&rec->preg); -#endif + if (rec->preg != NULL) i_regex_unref(rec->preg); if (rec->channels != NULL) g_strfreev(rec->channels); g_free_not_null(rec->mask); g_free_not_null(rec->servertag); diff -pru --unidirectional-new-file src/core/ignore.h regex/irssi-1.0.2/src/core/ignore.h --- src/core/ignore.h 2017-03-04 21:30:38.000000000 +0100 +++ regex/irssi-1.0.2/src/core/ignore.h 2017-03-11 21:57:00.000000000 +0100 @@ -1,9 +1,7 @@ #ifndef __IGNORE_H #define __IGNORE_H -#ifndef USE_GREGEX -# include -#endif +#include "iregex.h" typedef struct _IGNORE_REC IGNORE_REC; @@ -20,12 +18,7 @@ struct _IGNORE_REC { unsigned int regexp:1; unsigned int fullword:1; unsigned int replies:1; /* ignore replies to nick in channel */ -#ifdef USE_GREGEX - GRegex *preg; -#else - unsigned int regexp_compiled:1; /* should always be TRUE, unless regexp is invalid */ - regex_t preg; -#endif + Regex *preg; }; extern GSList *ignores; diff -pru --unidirectional-new-file src/core/iregex-gregex.c regex/irssi-1.0.2/src/core/iregex-gregex.c --- src/core/iregex-gregex.c 1970-01-01 01:00:00.000000000 +0100 +++ regex/irssi-1.0.2/src/core/iregex-gregex.c 2017-03-11 21:57:00.000000000 +0100 @@ -0,0 +1,138 @@ +#include + +#include "iregex.h" + +const gchar * +make_valid_utf8(const gchar *text, gboolean *free_ret) +{ + GString *str; + const gchar *ptr; + if (g_utf8_validate(text, -1, NULL)) { + if (free_ret) + *free_ret = FALSE; + return text; + } + + str = g_string_sized_new(strlen(text) + 12); + + ptr = text; + while (*ptr) { + gunichar c = g_utf8_get_char_validated(ptr, -1); + /* the unicode is invalid */ + if (c == (gunichar)-1 || c == (gunichar)-2) { + /* encode the byte into PUA-A */ + g_string_append_unichar(str, (gunichar) (0xfff00 | (*ptr & 0xff))); + ptr++; + } else { + g_string_append_unichar(str, c); + ptr = g_utf8_next_char(ptr); + } + } + + if (free_ret) + *free_ret = TRUE; + return g_string_free(str, FALSE); +} + +Regex * +i_regex_new (const gchar *pattern, + GRegexCompileFlags compile_options, + GRegexMatchFlags match_options, + GError **error) +{ + const gchar *valid_pattern; + gboolean free_valid_pattern; + Regex *ret = NULL; + + valid_pattern = make_valid_utf8(pattern, &free_valid_pattern); + ret = g_regex_new(valid_pattern, compile_options, match_options, error); + + if (free_valid_pattern) + g_free_not_null((gchar *)valid_pattern); + + return ret; +} + +void +i_regex_unref (Regex *regex) +{ + g_regex_unref(regex); +} + +/* if new_string is present, the caller must free new_string. + otherwise, g_match_info_get_string must not be used. */ +gboolean +i_regex_match (const Regex *regex, + const gchar *string, + GRegexMatchFlags match_options, + MatchInfo **match_info, + const gchar **new_string) +{ + gboolean ret; + gboolean free_valid_string; + const gchar *valid_string = make_valid_utf8(string, &free_valid_string); + + ret = g_regex_match(regex, valid_string, match_options, match_info); + if (free_valid_string) { + if (new_string) + *new_string = valid_string; + else + g_free_not_null((gchar *)valid_string); + } + return ret; +} + +gsize +strlen_pua_oddly(const char *str) +{ + const gchar *ptr; + gsize ret = 0; + ptr = str; + + while (*ptr) { + const gchar *old; + gunichar c = g_utf8_get_char(ptr); + old = ptr; + ptr = g_utf8_next_char(ptr); + + /* it is our PUA encoded byte */ + if ((c & 0xfff00) == 0xfff00) + ret++; + else + ret += ptr - old; + } + + return ret; +} + +gboolean +i_match_info_fetch_pos (const MatchInfo *match_info, + gint match_num, + gint *start_pos, + gint *end_pos, + const gchar *new_string) +{ + gint tmp_start, tmp_end, new_start_pos; + gboolean ret; + + if (!new_string || (!start_pos && !end_pos)) + return g_match_info_fetch_pos(match_info, match_num, start_pos, end_pos); + + ret = g_match_info_fetch_pos(match_info, match_num, &tmp_start, &tmp_end); + if (start_pos || end_pos) { + gchar *to_start = g_strndup(new_string, tmp_start); + new_start_pos = strlen_pua_oddly(to_start); + g_free_not_null(to_start); + + if (start_pos) + *start_pos = new_start_pos; + + if (end_pos) { + gchar *to_end = g_strndup(new_string + tmp_start, tmp_end - tmp_start); + *end_pos = new_start_pos + strlen_pua_oddly(to_end); + g_free_not_null(to_end); + } + } + return ret; +} + diff -pru --unidirectional-new-file src/core/iregex.h regex/irssi-1.0.2/src/core/iregex.h --- src/core/iregex.h 1970-01-01 01:00:00.000000000 +0100 +++ regex/irssi-1.0.2/src/core/iregex.h 2017-03-11 21:57:00.000000000 +0100 @@ -0,0 +1,42 @@ +#ifndef __REGEX_H +#define __REGEX_H + +#include "common.h" + +#ifdef USE_GREGEX + +#include +typedef GRegex Regex; +typedef GMatchInfo MatchInfo; + +#else + +#include +typedef regex_t Regex; + +#endif + +Regex * +i_regex_new (const gchar *pattern, + GRegexCompileFlags compile_options, + GRegexMatchFlags match_options, + GError **error); + +void +i_regex_unref (Regex *regex); + +gboolean +i_regex_match (const Regex *regex, + const gchar *string, + GRegexMatchFlags match_options, + MatchInfo **match_info, + const gchar **new_string); + +gboolean +i_match_info_fetch_pos (const MatchInfo *match_info, + gint match_num, + gint *start_pos, + gint *end_pos, + const gchar *new_string); + +#endif diff -pru --unidirectional-new-file src/fe-common/core/hilight-text.c regex/irssi-1.0.2/src/fe-common/core/hilight-text.c --- src/fe-common/core/hilight-text.c 2017-03-10 17:31:03.000000000 +0100 +++ regex/irssi-1.0.2/src/fe-common/core/hilight-text.c 2017-03-11 21:57:00.000000000 +0100 @@ -26,6 +26,7 @@ #include "misc.h" #include "lib-config/iconfig.h" #include "settings.h" +#include "iregex.h" #include "servers.h" #include "channels.h" @@ -101,11 +102,7 @@ static void hilight_destroy(HILIGHT_REC { g_return_if_fail(rec != NULL); -#ifdef USE_GREGEX - if (rec->preg != NULL) g_regex_unref(rec->preg); -#else - if (rec->regexp_compiled) regfree(&rec->preg); -#endif + if (rec->preg != NULL) i_regex_unref(rec->preg); if (rec->channels != NULL) g_strfreev(rec->channels); g_free_not_null(rec->color); g_free_not_null(rec->act_color); @@ -122,19 +119,10 @@ static void hilights_destroy_all(void) static void hilight_init_rec(HILIGHT_REC *rec) { -#ifdef USE_GREGEX if (rec->preg != NULL) - g_regex_unref(rec->preg); + i_regex_unref(rec->preg); - rec->preg = g_regex_new(rec->text, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_CASELESS, 0, NULL); -#else - if (rec->regexp_compiled) regfree(&rec->preg); - if (!rec->regexp) - rec->regexp_compiled = FALSE; - else - rec->regexp_compiled = regcomp(&rec->preg, rec->text, - rec->case_sensitive ? REG_EXTENDED : (REG_EXTENDED|REG_ICASE)) == 0; -#endif + rec->preg = i_regex_new(rec->text, G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, NULL); } void hilight_create(HILIGHT_REC *rec) @@ -207,30 +195,18 @@ static gboolean hilight_match_text(HILIG gboolean ret = FALSE; if (rec->regexp) { -#ifdef USE_GREGEX if (rec->preg != NULL) { GMatchInfo *match; + const char *new_text = NULL; - g_regex_match (rec->preg, text, 0, &match); + i_regex_match(rec->preg, text, 0, &match, &new_text); if (g_match_info_matches(match)) - ret = g_match_info_fetch_pos(match, 0, match_beg, match_end); + ret = i_match_info_fetch_pos(match, 0, match_beg, match_end, new_text); g_match_info_free(match); + g_free_not_null((char *)new_text); } -#else - regmatch_t rmatch[1]; - - if (rec->regexp_compiled && - regexec(&rec->preg, text, 1, rmatch, 0) == 0) { - if (rmatch[0].rm_so > 0 && - match_beg != NULL && match_end != NULL) { - *match_beg = rmatch[0].rm_so; - *match_end = rmatch[0].rm_eo; - } - ret = TRUE; - } -#endif } else { char *match; @@ -529,13 +505,8 @@ static void hilight_print(int index, HIL if (rec->case_sensitive) g_string_append(options, "-matchcase "); if (rec->regexp) { g_string_append(options, "-regexp "); -#ifdef USE_GREGEX if (rec->preg == NULL) g_string_append(options, "[INVALID!] "); -#else - if (!rec->regexp_compiled) - g_string_append(options, "[INVALID!] "); -#endif } if (rec->priority != 0) diff -pru --unidirectional-new-file src/fe-text/textbuffer.c regex/irssi-1.0.2/src/fe-text/textbuffer.c --- src/fe-text/textbuffer.c 2017-03-10 17:31:03.000000000 +0100 +++ regex/irssi-1.0.2/src/fe-text/textbuffer.c 2017-03-11 21:57:00.000000000 +0100 @@ -24,6 +24,7 @@ #include "misc.h" #include "formats.h" #include "utf8.h" +#include "iregex.h" #include "textbuffer.h" @@ -537,11 +538,7 @@ GList *textbuffer_find_text(TEXT_BUFFER_ int before, int after, int regexp, int fullword, int case_sensitive) { -#ifdef USE_GREGEX - GRegex *preg; -#else - regex_t preg; -#endif + Regex *preg; LINE_REC *line, *pre_line; GList *matches; GString *str; @@ -551,23 +548,14 @@ GList *textbuffer_find_text(TEXT_BUFFER_ g_return_val_if_fail(buffer != NULL, NULL); g_return_val_if_fail(text != NULL, NULL); -#ifdef USE_GREGEX preg = NULL; if (regexp) { - preg = g_regex_new(text, G_REGEX_RAW | (case_sensitive ? 0 : G_REGEX_CASELESS), 0, NULL); + preg = i_regex_new(text, case_sensitive ? 0 : G_REGEX_CASELESS, 0, NULL); if (preg == NULL) return NULL; } -#else - if (regexp) { - int flags = REG_EXTENDED | REG_NOSUB | - (case_sensitive ? 0 : REG_ICASE); - if (regcomp(&preg, text, flags) != 0) - return NULL; - } -#endif matches = NULL; match_after = 0; str = g_string_new(NULL); @@ -584,17 +572,16 @@ GList *textbuffer_find_text(TEXT_BUFFER_ (line->info.level & nolevel) == 0; if (*text != '\0') { + const char *tmp = NULL; textbuffer_line2text(line, FALSE, str); if (line_matched) { line_matched = regexp ? -#ifdef USE_GREGEX - g_regex_match(preg, str->str, 0, NULL) -#else - regexec(&preg, str->str, 0, NULL, 0) == 0 -#endif + i_regex_match(preg, str->str, 0, NULL, &tmp) : match_func(str->str, text) != NULL; } + if (tmp && tmp != str->str) + g_free_not_null((char *)tmp); } if (line_matched) { @@ -623,12 +610,8 @@ GList *textbuffer_find_text(TEXT_BUFFER_ } } -#ifdef USE_GREGEX if (preg != NULL) - g_regex_unref(preg); -#else - if (regexp) regfree(&preg); -#endif + i_regex_unref(preg); g_string_free(str, TRUE); return matches; }