diff --git a/grep.changes b/grep.changes index 5c24492..693b21e 100644 --- a/grep.changes +++ b/grep.changes @@ -1,3 +1,8 @@ +------------------------------------------------------------------- +Thu Mar 22 18:18:33 CET 2007 - schwab@suse.de + +- Add a variant of the mbcache patch. + ------------------------------------------------------------------- Mon Jul 17 18:56:15 CEST 2006 - schwab@suse.de diff --git a/grep.spec b/grep.spec index a619ed9..269d513 100644 --- a/grep.spec +++ b/grep.spec @@ -1,7 +1,7 @@ # # spec file for package grep (Version 2.5.1a) # -# Copyright (c) 2006 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2007 SUSE LINUX Products GmbH, Nuernberg, Germany. # This file and all modifications and additions to the pristine # package are under the same license as the package itself. # @@ -13,13 +13,13 @@ Name: grep BuildRequires: pcre-devel URL: http://www.gnu.org/software/grep/ -License: GPL +License: GNU General Public License (GPL) Group: Productivity/Text/Utilities Provides: base:/usr/bin/grep Autoreqprov: on PreReq: %{install_info_prereq} Version: 2.5.1a -Release: 21 +Release: 48 Summary: Print lines matching a pattern Source: grep-%{version}.tar.bz2 Patch: grep-%{version}.diff @@ -35,6 +35,7 @@ Patch9: grep-2.5.1a-mbcset.diff Patch10: skip-devices.diff Patch11: pcre-execute.diff Patch12: kwset.diff +Patch13: mbcache.diff BuildRoot: %{_tmppath}/%{name}-%{version}-build %description @@ -67,6 +68,7 @@ Authors: %patch10 %patch11 %patch12 +%patch13 rename no nb po/no.* %build @@ -108,7 +110,9 @@ ln -sf ../../bin/grep $RPM_BUILD_ROOT/usr/bin/grep %doc %{_infodir}/grep*.gz /usr/share/locale/*/LC_MESSAGES/grep.mo -%changelog -n grep +%changelog +* Thu Mar 22 2007 - schwab@suse.de +- Add a variant of the mbcache patch. * Mon Jul 17 2006 - schwab@suse.de - Fix matching in unsafe encodings [#192390]. * Sun Jun 11 2006 - schwab@suse.de @@ -174,8 +178,8 @@ ln -sf ../../bin/grep $RPM_BUILD_ROOT/usr/bin/grep - Add i18n patch. * Tue Sep 04 2001 - schwab@suse.de - Update to grep 2.4.2: -- New option --binary-files -- Final newline silently provided. + - New option --binary-files + - Final newline silently provided. * Thu Mar 22 2001 - ro@suse.de - added split-aliases as provides * Wed Mar 07 2001 - schwab@suse.de diff --git a/mbcache.diff b/mbcache.diff new file mode 100644 index 0000000..33b071d --- /dev/null +++ b/mbcache.diff @@ -0,0 +1,584 @@ +--- src/dfa.c ++++ src/dfa.c +@@ -2755,7 +2755,8 @@ + match needs to be verified by a backtracking matcher. Otherwise + we store a 0 in *backref. */ + size_t +-dfaexec (struct dfa *d, char const *begin, size_t size, int *backref) ++dfaexec (struct dfa *d, char const *begin, size_t size, int *backref, ++ struct mb_cache *mb_cache) + { + register int s; /* Current state. */ + register unsigned char const *p; /* Current input character. */ +@@ -2787,43 +2788,77 @@ + #ifdef MBS_SUPPORT + if (MB_CUR_MAX > 1) + { +- int remain_bytes, i; + buf_begin = begin; + buf_end = end; +- +- /* initialize mblen_buf, and inputwcs. */ +- MALLOC(mblen_buf, unsigned char, end - (unsigned char const *)begin + 2); +- MALLOC(inputwcs, wchar_t, end - (unsigned char const *)begin + 2); +- memset(&mbs, 0, sizeof(mbstate_t)); +- remain_bytes = 0; +- for (i = 0; i < end - (unsigned char const *)begin + 1; i++) ++ if (mb_cache && mb_cache->mblen_buf && mb_cache->wcs_buf ++ && begin > mb_cache->orig_buf ++ && begin + size <= mb_cache->orig_buf + mb_cache->len) ++ { ++ /* The cache can help us. */ ++ MALLOC (mblen_buf, unsigned char, size + 2); ++ MALLOC (inputwcs, wchar_t, size + 2); ++ memcpy (mblen_buf, ++ mb_cache->mblen_buf + (begin - mb_cache->orig_buf), ++ (size + 2) * sizeof (unsigned char)); ++ memcpy (inputwcs, ++ mb_cache->wcs_buf + (begin - mb_cache->orig_buf), ++ (size + 2) * sizeof (wchar_t)); ++ mblen_buf[size + 1] = 0; ++ inputwcs[size + 1] = 0; ++ } ++ else + { +- if (remain_bytes == 0) ++ int remain_bytes, i; ++ ++ /* initialize mblen_buf, and inputwcs. */ ++ MALLOC(mblen_buf, unsigned char, end - (unsigned char const *)begin + 2); ++ MALLOC(inputwcs, wchar_t, end - (unsigned char const *)begin + 2); ++ memset(&mbs, 0, sizeof(mbstate_t)); ++ remain_bytes = 0; ++ for (i = 0; i < end - (unsigned char const *)begin + 1; i++) + { +- remain_bytes +- = mbrtowc(inputwcs + i, begin + i, +- end - (unsigned char const *)begin - i + 1, &mbs); +- if (remain_bytes <= 1) ++ if (remain_bytes == 0) + { +- remain_bytes = 0; +- inputwcs[i] = (wchar_t)begin[i]; +- mblen_buf[i] = 0; ++ remain_bytes ++ = mbrtowc(inputwcs + i, begin + i, ++ end - (unsigned char const *)begin - i + 1, &mbs); ++ if (remain_bytes <= 1) ++ { ++ remain_bytes = 0; ++ inputwcs[i] = (wchar_t)begin[i]; ++ mblen_buf[i] = 0; ++ } ++ else ++ { ++ mblen_buf[i] = remain_bytes; ++ remain_bytes--; ++ } + } + else + { + mblen_buf[i] = remain_bytes; ++ inputwcs[i] = 0; + remain_bytes--; + } + } +- else ++ mblen_buf[i] = 0; ++ inputwcs[i] = 0; /* sentinel */ ++ ++ if (mb_cache) + { +- mblen_buf[i] = remain_bytes; +- inputwcs[i] = 0; +- remain_bytes--; ++ /* Populate the cache. */ ++ mb_cache->len = size; ++ mb_cache->orig_buf = begin; ++ free (mb_cache->mblen_buf); ++ free (mb_cache->wcs_buf); ++ MALLOC (mb_cache->mblen_buf, unsigned char, size + 2); ++ MALLOC (mb_cache->wcs_buf, wchar_t, size + 2); ++ memcpy (mb_cache->mblen_buf, mblen_buf, ++ (size + 2) * sizeof (unsigned char)); ++ memcpy (mb_cache->wcs_buf, inputwcs, ++ (size + 2) * sizeof (wchar_t)); + } + } +- mblen_buf[i] = 0; +- inputwcs[i] = 0; /* sentinel */ + } + #endif /* MBS_SUPPORT */ + +--- src/dfa.h ++++ src/dfa.h +@@ -22,6 +22,8 @@ + In addition to clobbering modularity, we eat up valuable + name space. */ + ++#include "mbcache.h" ++ + #ifdef __STDC__ + # ifndef _PTR_T + # define _PTR_T +@@ -403,7 +405,8 @@ + order to verify backreferencing; otherwise the flag will be cleared. + Returns (size_t) -1 if no match is found, or the offset of the first + character after the first & shortest matching string in the buffer. */ +-extern size_t dfaexec PARAMS ((struct dfa *, char const *, size_t, int *)); ++extern size_t dfaexec PARAMS ((struct dfa *, char const *, size_t, int *, ++ struct mb_cache *)); + + /* Free the storage held by the components of a struct dfa. */ + extern void dfafree PARAMS ((struct dfa *)); +--- src/grep.c ++++ src/grep.c +@@ -189,7 +189,8 @@ + + /* Functions we'll use to search. */ + static void (*compile) PARAMS ((char const *, size_t)); +-static size_t (*execute) PARAMS ((char const *, size_t, size_t *, int)); ++static size_t (*execute) PARAMS ((char const *, size_t, struct mb_cache *, ++ size_t *, int)); + + /* Like error, but suppress the diagnostic if requested. */ + static void +@@ -506,7 +507,7 @@ + } + + static void +-prline (char const *beg, char const *lim, int sep) ++prline (char const *beg, char const *lim, int sep, struct mb_cache *mb_cache) + { + if (out_file) + printf ("%s%c", filename, sep & filename_mask); +@@ -529,7 +530,8 @@ + { + size_t match_size; + size_t match_offset; +- while ((match_offset = (*execute) (beg, lim - beg, &match_size, 1)) ++ while ((match_offset = (*execute) (beg, lim - beg, mb_cache, ++ &match_size, 1)) + != (size_t) -1) + { + char const *b = beg + match_offset; +@@ -563,7 +565,8 @@ + int i; + for (i = 0; i < lim - beg; i++) + ibeg[i] = tolower (beg[i]); +- while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1)) ++ while ((match_offset = (*execute) (ibeg, ilim-ibeg, mb_cache, ++ &match_size, 1)) + != (size_t) -1) + { + char const *b = beg + match_offset; +@@ -581,7 +584,8 @@ + lastout = lim; + return; + } +- while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1)) ++ while (lim-beg && (match_offset = (*execute) (beg, lim - beg, mb_cache, ++ &match_size, 1)) + != (size_t) -1) + { + char const *b = beg + match_offset; +@@ -609,7 +613,7 @@ + /* Print pending lines of trailing context prior to LIM. Trailing context ends + at the next matching line when OUTLEFT is 0. */ + static void +-prpending (char const *lim) ++prpending (char const *lim, struct mb_cache *mb_cache) + { + if (!lastout) + lastout = bufbeg; +@@ -619,9 +623,10 @@ + size_t match_size; + --pending; + if (outleft +- || (((*execute) (lastout, nl - lastout, &match_size, 0) == (size_t) -1) ++ || (((*execute) (lastout, nl - lastout, mb_cache, ++ &match_size, 0) == (size_t) -1) + == !out_invert)) +- prline (lastout, nl + 1, '-'); ++ prline (lastout, nl + 1, '-', mb_cache); + else + pending = 0; + } +@@ -630,7 +635,8 @@ + /* Print the lines between BEG and LIM. Deal with context crap. + If NLINESP is non-null, store a count of lines between BEG and LIM. */ + static void +-prtext (char const *beg, char const *lim, int *nlinesp) ++prtext (char const *beg, char const *lim, int *nlinesp, ++ struct mb_cache *mb_cache) + { + static int used; /* avoid printing "--" before any output */ + char const *bp, *p; +@@ -638,7 +644,7 @@ + int i, n; + + if (!out_quiet && pending > 0) +- prpending (beg); ++ prpending (beg, mb_cache); + + p = beg; + +@@ -662,7 +668,7 @@ + { + char const *nl = memchr (p, eol, beg - p); + nl++; +- prline (p, nl, '-'); ++ prline (p, nl, '-', mb_cache); + p = nl; + } + } +@@ -675,7 +681,7 @@ + char const *nl = memchr (p, eol, lim - p); + nl++; + if (!out_quiet) +- prline (p, nl, ':'); ++ prline (p, nl, ':', mb_cache); + p = nl; + } + *nlinesp = n; +@@ -685,7 +691,7 @@ + } + else + if (!out_quiet) +- prline (beg, lim, ':'); ++ prline (beg, lim, ':', mb_cache); + + pending = out_quiet ? 0 : out_after; + used = 1; +@@ -695,7 +701,7 @@ + between matching lines if OUT_INVERT is true). Return a count of + lines printed. */ + static int +-grepbuf (char const *beg, char const *lim) ++grepbuf (char const *beg, char const *lim, struct mb_cache *mb_cache) + { + int nlines, n; + register char const *p; +@@ -704,7 +710,8 @@ + + nlines = 0; + p = beg; +- while ((match_offset = (*execute) (p, lim - p, &match_size, 0)) != (size_t) -1) ++ while ((match_offset = (*execute) (p, lim - p, mb_cache, ++ &match_size, 0)) != (size_t) -1) + { + char const *b = p + match_offset; + char const *endp = b + match_size; +@@ -713,7 +720,7 @@ + break; + if (!out_invert) + { +- prtext (b, endp, (int *) 0); ++ prtext (b, endp, (int *) 0, mb_cache); + nlines++; + outleft--; + if (!outleft || done_on_match) +@@ -726,7 +733,7 @@ + } + else if (p < b) + { +- prtext (p, b, &n); ++ prtext (p, b, &n, mb_cache); + nlines += n; + outleft -= n; + if (!outleft) +@@ -736,7 +743,7 @@ + } + if (out_invert && p < lim) + { +- prtext (p, lim, &n); ++ prtext (p, lim, &n, mb_cache); + nlines += n; + outleft -= n; + } +@@ -756,7 +763,16 @@ + char *beg; + char *lim; + char eol = eolbyte; ++ struct mb_cache *mb_cache; ++#ifdef MBS_SUPPORT ++ struct mb_cache mb_cache_inst; + ++ mb_cache = &mb_cache_inst; ++ mb_cache->mblen_buf = 0; ++ mb_cache->wcs_buf = 0; ++#else ++ mb_cache = 0; ++#endif + if (!reset (fd, file, stats)) + return 0; + +@@ -823,9 +839,9 @@ + if (beg < lim) + { + if (outleft) +- nlines += grepbuf (beg, lim); ++ nlines += grepbuf (beg, lim, mb_cache); + if (pending) +- prpending (lim); ++ prpending (lim, mb_cache); + if((!outleft && !pending) || (nlines && done_on_match && !out_invert)) + goto finish_grep; + } +@@ -853,6 +869,12 @@ + totalcc = add_count (totalcc, buflim - bufbeg - save); + if (out_line) + nlscan (beg); ++#ifdef MBS_SUPPORT ++ free (mb_cache->wcs_buf); ++ free (mb_cache->mblen_buf); ++ mb_cache->wcs_buf = 0; ++ mb_cache->mblen_buf = 0; ++#endif + if (! fillbuf (save, stats)) + { + if (! is_EISDIR (errno, file)) +@@ -864,9 +886,9 @@ + { + *buflim++ = eol; + if (outleft) +- nlines += grepbuf (bufbeg + save - residue, buflim); ++ nlines += grepbuf (bufbeg + save - residue, buflim, mb_cache); + if (pending) +- prpending (buflim); ++ prpending (buflim, mb_cache); + } + + finish_grep: +@@ -874,6 +896,11 @@ + out_quiet -= not_text; + if ((not_text & ~out_quiet) && nlines != 0) + printf (_("Binary file %s matches\n"), filename); ++ ++#ifdef MBS_SUPPORT ++ free (mb_cache->wcs_buf); ++ free (mb_cache->mblen_buf); ++#endif + return nlines; + } + +--- src/grep.h ++++ src/grep.h +@@ -20,6 +20,8 @@ + # define __attribute__(x) + #endif + ++#include "mbcache.h" ++ + /* Grep.c expects the matchers vector to be terminated + by an entry with a NULL compile, and to contain at least + an entry named "default". */ +@@ -28,7 +30,8 @@ + { + char name[8]; + void (*compile) PARAMS ((char const *, size_t)); +- size_t (*execute) PARAMS ((char const *, size_t, size_t *, int)); ++ size_t (*execute) PARAMS ((char const *, size_t, struct mb_cache *, ++ size_t *, int)); + } const matchers[]; + + /* Exported from fgrepmat.c, egrepmat.c, grepmat.c. */ +--- src/mbcache.h ++++ src/mbcache.h +@@ -0,0 +1,14 @@ ++#ifndef MB_CACHE_DEFINED ++#define MB_CACHE_DEFINED ++#ifdef MBS_SUPPORT ++struct mb_cache ++{ ++ size_t len; ++ const char *orig_buf; /* not the only reference; do not free */ ++ wchar_t *wcs_buf; ++ unsigned char *mblen_buf; ++}; ++#else ++struct mb_cache; ++#endif ++#endif +--- src/search.c ++++ src/search.c +@@ -71,18 +71,23 @@ + static int kwset_exact_matches; + + #if defined(MBS_SUPPORT) +-static char* check_multibyte_string PARAMS ((char const *buf, size_t size)); ++static char* check_multibyte_string PARAMS ((char const *buf, size_t size, ++ struct mb_cache *, ++ char const *orig_buf)); + extern int convert_mbstr; + #endif + static void kwsinit PARAMS ((void)); + static void kwsmusts PARAMS ((void)); + static void Gcompile PARAMS ((char const *, size_t)); + static void Ecompile PARAMS ((char const *, size_t)); +-static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int )); ++static size_t EGexecute PARAMS ((char const *, size_t, struct mb_cache *, ++ size_t *, int )); + static void Fcompile PARAMS ((char const *, size_t)); +-static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int)); ++static size_t Fexecute PARAMS ((char const *, size_t, struct mb_cache *, ++ size_t *, int)); + static void Pcompile PARAMS ((char const *, size_t )); +-static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); ++static size_t Pexecute PARAMS ((char const *, size_t, struct mb_cache *, ++ size_t *, int)); + + void + dfaerror (char const *mesg) +@@ -148,35 +153,61 @@ + are not singlebyte character nor the first byte of a multibyte + character. Caller must free the array. */ + static char* +-check_multibyte_string(char const *buf, size_t size) ++check_multibyte_string(char const *buf, size_t size, struct mb_cache *mb_cache, ++ char const *orig_buf) + { + char *mb_properties = xmalloc(size); + mbstate_t cur_state; + wchar_t wc; + int i; + memset(&cur_state, 0, sizeof(mbstate_t)); +- memset(mb_properties, 0, sizeof(char)*size); +- for (i = 0; i < size ;) +- { +- size_t mbclen; +- mbclen = mbrtowc (&wc, buf + i, size - i, &cur_state); ++ if (mb_cache && mb_cache->mblen_buf ++ && orig_buf > mb_cache->orig_buf ++ && orig_buf + size <= mb_cache->orig_buf + mb_cache->len) ++ { ++ /* The cache can help us. */ ++ memcpy (mb_properties, ++ mb_cache->mblen_buf + (orig_buf - mb_cache->orig_buf), ++ size); + +- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) +- { +- /* An invalid sequence, or a truncated multibyte character. +- We treat it as a singlebyte character. */ +- mbclen = 1; +- } +- else if (match_icase) ++ } ++ else ++ { ++ memset(mb_properties, 0, sizeof(char)*size); ++ for (i = 0; i < size ;) + { +- if (iswupper ((wint_t) wc)) +- { +- wc = towlower ((wint_t) wc); +- wcrtomb (buf + i, wc, &cur_state); +- } ++ size_t mbclen; ++ mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); ++ ++ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) ++ { ++ /* An invalid sequence, or a truncated multibyte character. ++ We treat it as a singlebyte character. */ ++ mbclen = 1; ++ } ++ else if (match_icase) ++ { ++ if (iswupper((wint_t)wc)) ++ { ++ wc = towlower((wint_t)wc); ++ wcrtomb(buf + i, wc, &cur_state); ++ } ++ } ++ mb_properties[i] = mbclen; ++ i += mbclen; ++ } ++ ++ /* Now populate the cache. */ ++ if (mb_cache) ++ { ++ free (mb_cache->wcs_buf); ++ mb_cache->wcs_buf = NULL; ++ free (mb_cache->mblen_buf); ++ mb_cache->len = size; ++ mb_cache->orig_buf = orig_buf; ++ mb_cache->mblen_buf = xmalloc (size); ++ memcpy (mb_cache->mblen_buf, mb_properties, size); + } +- mb_properties[i] = mbclen; +- i += mbclen; + } + + return mb_properties; +@@ -363,9 +394,11 @@ + } + + static size_t +-EGexecute (char const *buf, size_t size, size_t *match_size, int exact) ++EGexecute (char const *buf, size_t size, struct mb_cache *mb_cache, ++ size_t *match_size, int exact) + { + register char const *buflim, *beg, *end; ++ char const *orig_buf = buf; + char eol = eolbyte; + int backref, start, len; + struct kwsmatch kwsm; +@@ -380,7 +413,7 @@ + memcpy (case_buf, buf, size); + buf = case_buf; + if (kwset && convert_mbstr) +- mb_properties = check_multibyte_string (buf, size); ++ mb_properties = check_multibyte_string (buf, size, mb_cache, orig_buf); + } + } + #endif /* MBS_SUPPORT */ +@@ -416,13 +449,13 @@ + --beg; + if (kwsm.index < kwset_exact_matches) + goto success; +- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) ++ if (dfaexec (&dfa, beg, end - beg, &backref, mb_cache) == (size_t) -1) + continue; + } + else + { + /* No good fixed strings; start with DFA. */ +- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); ++ size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref, mb_cache); + if (offset == (size_t) -1) + break; + /* Narrow down to the line we've found. */ +@@ -547,9 +580,11 @@ + } + + static size_t +-Fexecute (char const *buf, size_t size, size_t *match_size, int exact) ++Fexecute (char const *buf, size_t size, struct mb_cache *mb_cache, ++ size_t *match_size, int exact) + { + register char const *beg, *try, *end; ++ char const *orig_buf = buf; + register size_t len; + char eol = eolbyte; + struct kwsmatch kwsmatch; +@@ -564,7 +599,7 @@ + memcpy (case_buf, buf, size); + buf = case_buf; + if (convert_mbstr) +- mb_properties = check_multibyte_string (buf, size); ++ mb_properties = check_multibyte_string (buf, size, mb_cache, orig_buf); + } + } + #endif /* MBS_SUPPORT */ +@@ -755,7 +790,8 @@ + } + + static size_t +-Pexecute (char const *buf, size_t size, size_t *match_size, int exact) ++Pexecute (char const *buf, size_t size, struct mb_cache *mb_cache, ++ size_t *match_size, int exact) + { + #if !HAVE_LIBPCRE + abort ();