SHA256
1
0
forked from pool/grep
grep/grep-i18n-speedup.diff

154 lines
3.7 KiB
Diff

--- src/grep.c
+++ src/grep.c
@@ -157,6 +157,9 @@
int match_words;
int match_lines;
unsigned char eolbyte;
+#ifdef MBS_SUPPORT
+int convert_mbstr = 0;
+#endif
/* For error messages. */
/* The name the program was run with, stripped of any leading path. */
@@ -1723,10 +1726,22 @@
}
else
{
+ wchar_t wc2;
if (iswupper ((wint_t) wc))
{
- wc = towlower ((wint_t) wc);
- wcrtomb (keys + i, wc, &cur_state);
+ wc2 = towlower ((wint_t) wc);
+ wcrtomb (keys + i, wc2, &cur_state);
+ if (mbclen > 1 && wc != wc2)
+ convert_mbstr = 1;
+ }
+ else
+ {
+ if (mbclen > 1)
+ {
+ wc2 = towupper ((wint_t) wc);
+ if (wc != wc2)
+ convert_mbstr = 1;
+ }
}
}
i += mbclen;
--- src/search.c
+++ src/search.c
@@ -72,6 +72,7 @@
#if defined(MBS_SUPPORT)
static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
+extern int convert_mbstr;
#endif
static void kwsinit PARAMS ((void));
static void kwsmusts PARAMS ((void));
@@ -180,6 +181,26 @@
return mb_properties;
}
+
+/* Check whether the byte at position POS in BUF is a valid character. */
+static int
+check_valid_multibyte (const char *buf, size_t pos, size_t size)
+{
+ size_t i;
+ mbstate_t cur_state;
+
+ memset (&cur_state, 0, sizeof (mbstate_t));
+ for (i = 0; i < pos; )
+ {
+ size_t mbclen;
+ mbclen = mbrlen (buf + i, size - i, &cur_state);
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+ mbclen = 1;
+ i += mbclen;
+ }
+
+ return i == pos;
+}
#endif
static void
@@ -344,7 +365,7 @@
static size_t
EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
{
- register char const *buflim, *beg, *end;
+ register char const *buflim, *beg, *end, *oldbeg;
char eol = eolbyte;
int backref, start, len;
struct kwsmatch kwsm;
@@ -358,9 +379,9 @@
char *case_buf = xmalloc (size);
memcpy (case_buf, buf, size);
buf = case_buf;
- }
- if (kwset)
- mb_properties = check_multibyte_string (buf, size);
+ if (kwset && convert_mbstr)
+ mb_properties = check_multibyte_string (buf, size);
+ }
}
#endif /* MBS_SUPPORT */
@@ -386,14 +407,24 @@
#endif /* MBS_SUPPORT */
return (size_t)-1;
}
+ oldbeg = beg;
beg += offset;
/* Narrow down to the line containing the candidate, and
run it through DFA. */
end = memchr(beg, eol, buflim - beg);
end++;
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
- continue;
+ if (MB_CUR_MAX > 1)
+ {
+ if (mb_properties)
+ {
+ if (mb_properties[beg - buf] == 0)
+ continue;
+ }
+ else if (! check_valid_multibyte (oldbeg, offset,
+ end - oldbeg))
+ continue;
+ }
#endif
while (beg > buf && beg[-1] != eol)
--beg;
@@ -546,8 +577,9 @@
char *case_buf = xmalloc (size);
memcpy (case_buf, buf, size);
buf = case_buf;
+ if (convert_mbstr)
+ mb_properties = check_multibyte_string (buf, size);
}
- mb_properties = check_multibyte_string (buf, size);
}
#endif /* MBS_SUPPORT */
@@ -567,8 +599,16 @@
return offset;
}
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
- continue; /* It is a part of multibyte character. */
+ if (MB_CUR_MAX > 1)
+ {
+ if (mb_properties)
+ {
+ if (mb_properties[offset+beg-buf] == 0)
+ continue; /* It is a part of multibyte character. */
+ }
+ else if (! check_valid_multibyte (beg, offset, size - offset))
+ continue;
+ }
#endif /* MBS_SUPPORT */
beg += offset;
len = kwsmatch.size[0];