154 lines
3.7 KiB
Diff
154 lines
3.7 KiB
Diff
--- src/grep.c
|
|
+++ src/grep.c
|
|
@@ -157,6 +157,9 @@
|
|
int match_words;
|
|
int match_lines;
|
|
unsigned char eolbyte;
|
|
+#ifdef MBS_SUPPORT
|
|
+int convert_mbstr = 0;
|
|
+#endif
|
|
|
|
/* For error messages. */
|
|
/* The name the program was run with, stripped of any leading path. */
|
|
@@ -1723,10 +1726,22 @@
|
|
}
|
|
else
|
|
{
|
|
+ wchar_t wc2;
|
|
if (iswupper ((wint_t) wc))
|
|
{
|
|
- wc = towlower ((wint_t) wc);
|
|
- wcrtomb (keys + i, wc, &cur_state);
|
|
+ wc2 = towlower ((wint_t) wc);
|
|
+ wcrtomb (keys + i, wc2, &cur_state);
|
|
+ if (mbclen > 1 && wc != wc2)
|
|
+ convert_mbstr = 1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ if (mbclen > 1)
|
|
+ {
|
|
+ wc2 = towupper ((wint_t) wc);
|
|
+ if (wc != wc2)
|
|
+ convert_mbstr = 1;
|
|
+ }
|
|
}
|
|
}
|
|
i += mbclen;
|
|
--- src/search.c
|
|
+++ src/search.c
|
|
@@ -72,6 +72,7 @@
|
|
|
|
#if defined(MBS_SUPPORT)
|
|
static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
|
|
+extern int convert_mbstr;
|
|
#endif
|
|
static void kwsinit PARAMS ((void));
|
|
static void kwsmusts PARAMS ((void));
|
|
@@ -180,6 +181,26 @@
|
|
|
|
return mb_properties;
|
|
}
|
|
+
|
|
+/* Check whether the byte at position POS in BUF is a valid character. */
|
|
+static int
|
|
+check_valid_multibyte (const char *buf, size_t pos, size_t size)
|
|
+{
|
|
+ size_t i;
|
|
+ mbstate_t cur_state;
|
|
+
|
|
+ memset (&cur_state, 0, sizeof (mbstate_t));
|
|
+ for (i = 0; i < pos; )
|
|
+ {
|
|
+ size_t mbclen;
|
|
+ mbclen = mbrlen (buf + i, size - i, &cur_state);
|
|
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
|
|
+ mbclen = 1;
|
|
+ i += mbclen;
|
|
+ }
|
|
+
|
|
+ return i == pos;
|
|
+}
|
|
#endif
|
|
|
|
static void
|
|
@@ -344,7 +365,7 @@
|
|
static size_t
|
|
EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
|
|
{
|
|
- register char const *buflim, *beg, *end;
|
|
+ register char const *buflim, *beg, *end, *oldbeg;
|
|
char eol = eolbyte;
|
|
int backref, start, len;
|
|
struct kwsmatch kwsm;
|
|
@@ -358,9 +379,9 @@
|
|
char *case_buf = xmalloc (size);
|
|
memcpy (case_buf, buf, size);
|
|
buf = case_buf;
|
|
- }
|
|
- if (kwset)
|
|
- mb_properties = check_multibyte_string (buf, size);
|
|
+ if (kwset && convert_mbstr)
|
|
+ mb_properties = check_multibyte_string (buf, size);
|
|
+ }
|
|
}
|
|
#endif /* MBS_SUPPORT */
|
|
|
|
@@ -386,14 +407,24 @@
|
|
#endif /* MBS_SUPPORT */
|
|
return (size_t)-1;
|
|
}
|
|
+ oldbeg = beg;
|
|
beg += offset;
|
|
/* Narrow down to the line containing the candidate, and
|
|
run it through DFA. */
|
|
end = memchr(beg, eol, buflim - beg);
|
|
end++;
|
|
#ifdef MBS_SUPPORT
|
|
- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
|
|
- continue;
|
|
+ if (MB_CUR_MAX > 1)
|
|
+ {
|
|
+ if (mb_properties)
|
|
+ {
|
|
+ if (mb_properties[beg - buf] == 0)
|
|
+ continue;
|
|
+ }
|
|
+ else if (! check_valid_multibyte (oldbeg, offset,
|
|
+ end - oldbeg))
|
|
+ continue;
|
|
+ }
|
|
#endif
|
|
while (beg > buf && beg[-1] != eol)
|
|
--beg;
|
|
@@ -546,8 +577,9 @@
|
|
char *case_buf = xmalloc (size);
|
|
memcpy (case_buf, buf, size);
|
|
buf = case_buf;
|
|
+ if (convert_mbstr)
|
|
+ mb_properties = check_multibyte_string (buf, size);
|
|
}
|
|
- mb_properties = check_multibyte_string (buf, size);
|
|
}
|
|
#endif /* MBS_SUPPORT */
|
|
|
|
@@ -567,8 +599,16 @@
|
|
return offset;
|
|
}
|
|
#ifdef MBS_SUPPORT
|
|
- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
|
|
- continue; /* It is a part of multibyte character. */
|
|
+ if (MB_CUR_MAX > 1)
|
|
+ {
|
|
+ if (mb_properties)
|
|
+ {
|
|
+ if (mb_properties[offset+beg-buf] == 0)
|
|
+ continue; /* It is a part of multibyte character. */
|
|
+ }
|
|
+ else if (! check_valid_multibyte (beg, offset, size - offset))
|
|
+ continue;
|
|
+ }
|
|
#endif /* MBS_SUPPORT */
|
|
beg += offset;
|
|
len = kwsmatch.size[0];
|