revert accidental commit
OBS-URL: https://build.opensuse.org/package/show/Base:System/file?expand=0&rev=236
This commit is contained in:
parent
47aed56704
commit
9797584eba
@ -1,346 +0,0 @@
|
|||||||
From 1957db8212e9c74e5d626de3023e49d0bb502052 Mon Sep 17 00:00:00 2001
|
|
||||||
From: =?UTF-8?q?Dirk=20M=C3=BCller?= <dirk@dmllr.de>
|
|
||||||
Date: Fri, 11 Mar 2022 23:51:55 +0100
|
|
||||||
Subject: [PATCH] Cache compiled regexps between magic matches
|
|
||||||
|
|
||||||
regcomp() is relatively expensive compared to regexec() for matching,
|
|
||||||
so it helps to only compile once and then reuse the compiled version
|
|
||||||
for future matches of the same magic.
|
|
||||||
|
|
||||||
when doing equivalent of `find | xargs file` this provides a massive
|
|
||||||
speedup, between factor 2 and 4 depending on how heavy the magic
|
|
||||||
is on regexp usage.
|
|
||||||
|
|
||||||
The memory overhead is mediocre (~ 200kb ) and it compiles regexps
|
|
||||||
lazy, so it doesn't add significant overhead to single match usecases.
|
|
||||||
---
|
|
||||||
src/apprentice.c | 26 +++++++++++++++++++----
|
|
||||||
src/file.h | 40 ++++++++++++++++++-----------------
|
|
||||||
src/softmagic.c | 54 +++++++++++++++++++++++++++---------------------
|
|
||||||
3 files changed, 73 insertions(+), 47 deletions(-)
|
|
||||||
|
|
||||||
Index: file-5.41/src/apprentice.c
|
|
||||||
===================================================================
|
|
||||||
--- file-5.41.orig/src/apprentice.c
|
|
||||||
+++ file-5.41/src/apprentice.c
|
|
||||||
@@ -425,7 +425,14 @@ add_mlist(struct mlist *mlp, struct magi
|
|
||||||
ml->map = idx == 0 ? map : NULL;
|
|
||||||
ml->magic = map->magic[idx];
|
|
||||||
ml->nmagic = map->nmagic[idx];
|
|
||||||
-
|
|
||||||
+ ml->magic_rxcomp = NULL;
|
|
||||||
+ if (ml->nmagic) {
|
|
||||||
+ ml->magic_rxcomp = CAST(file_regex_t**, calloc(ml->nmagic, sizeof(file_regex_t*)));
|
|
||||||
+ if (ml->magic_rxcomp == NULL) {
|
|
||||||
+ free(ml);
|
|
||||||
+ return -1;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
mlp->prev->next = ml;
|
|
||||||
ml->prev = mlp->prev;
|
|
||||||
ml->next = mlp;
|
|
||||||
@@ -610,8 +617,19 @@ mlist_free_all(struct magic_set *ms)
|
|
||||||
private void
|
|
||||||
mlist_free_one(struct mlist *ml)
|
|
||||||
{
|
|
||||||
+ size_t i;
|
|
||||||
+
|
|
||||||
if (ml->map)
|
|
||||||
apprentice_unmap(CAST(struct magic_map *, ml->map));
|
|
||||||
+
|
|
||||||
+ for (i = 0; i < ml->nmagic; ++i) {
|
|
||||||
+ if (ml->magic_rxcomp[i]) {
|
|
||||||
+ file_regfree(ml->magic_rxcomp[i]);
|
|
||||||
+ free(ml->magic_rxcomp[i]);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ free(ml->magic_rxcomp);
|
|
||||||
+ ml->magic_rxcomp = NULL;
|
|
||||||
free(ml);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -3548,16 +3566,16 @@ file_magicfind(struct magic_set *ms, con
|
|
||||||
|
|
||||||
for (ml = mlist->next; ml != mlist; ml = ml->next) {
|
|
||||||
struct magic *ma = ml->magic;
|
|
||||||
- uint32_t nma = ml->nmagic;
|
|
||||||
- for (i = 0; i < nma; i++) {
|
|
||||||
+ for (i = 0; i < ml->nmagic; i++) {
|
|
||||||
if (ma[i].type != FILE_NAME)
|
|
||||||
continue;
|
|
||||||
if (strcmp(ma[i].value.s, name) == 0) {
|
|
||||||
v->magic = &ma[i];
|
|
||||||
- for (j = i + 1; j < nma; j++)
|
|
||||||
+ for (j = i + 1; j < ml->nmagic; j++)
|
|
||||||
if (ma[j].cont_level == 0)
|
|
||||||
break;
|
|
||||||
v->nmagic = j - i;
|
|
||||||
+ v->magic_rxcomp = ml->magic_rxcomp;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Index: file-5.41/src/file.h
|
|
||||||
===================================================================
|
|
||||||
--- file-5.41.orig/src/file.h
|
|
||||||
+++ file-5.41/src/file.h
|
|
||||||
@@ -88,6 +88,10 @@
|
|
||||||
/* Do this here and now, because struct stat gets re-defined on solaris */
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <stdarg.h>
|
|
||||||
+#include <locale.h>
|
|
||||||
+#if defined(HAVE_XLOCALE_H)
|
|
||||||
+#include <xlocale.h>
|
|
||||||
+#endif
|
|
||||||
|
|
||||||
#define ENABLE_CONDITIONALS
|
|
||||||
|
|
||||||
@@ -167,6 +171,19 @@
|
|
||||||
#define FILE_COMPILE 2
|
|
||||||
#define FILE_LIST 3
|
|
||||||
|
|
||||||
+typedef struct {
|
|
||||||
+ const char *pat;
|
|
||||||
+#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE)
|
|
||||||
+#define USE_C_LOCALE
|
|
||||||
+ locale_t old_lc_ctype;
|
|
||||||
+ locale_t c_lc_ctype;
|
|
||||||
+#else
|
|
||||||
+ char *old_lc_ctype;
|
|
||||||
+#endif
|
|
||||||
+ int rc;
|
|
||||||
+ regex_t rx;
|
|
||||||
+} file_regex_t;
|
|
||||||
+
|
|
||||||
struct buffer {
|
|
||||||
int fd;
|
|
||||||
struct stat st;
|
|
||||||
@@ -397,9 +414,10 @@ struct magic {
|
|
||||||
|
|
||||||
/* list of magic entries */
|
|
||||||
struct mlist {
|
|
||||||
- struct magic *magic; /* array of magic entries */
|
|
||||||
- uint32_t nmagic; /* number of entries in array */
|
|
||||||
- void *map; /* internal resources used by entry */
|
|
||||||
+ struct magic *magic; /* array of magic entries */
|
|
||||||
+ file_regex_t **magic_rxcomp; /* array of compiled regexps */
|
|
||||||
+ size_t nmagic; /* number of entries in array */
|
|
||||||
+ void *map; /* internal resources used by entry */
|
|
||||||
struct mlist *next, *prev;
|
|
||||||
};
|
|
||||||
|
|
||||||
@@ -568,23 +586,7 @@ protected void buffer_init(struct buffer
|
|
||||||
protected void buffer_fini(struct buffer *);
|
|
||||||
protected int buffer_fill(const struct buffer *);
|
|
||||||
|
|
||||||
-#include <locale.h>
|
|
||||||
-#if defined(HAVE_XLOCALE_H)
|
|
||||||
-#include <xlocale.h>
|
|
||||||
-#endif
|
|
||||||
|
|
||||||
-typedef struct {
|
|
||||||
- const char *pat;
|
|
||||||
-#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE)
|
|
||||||
-#define USE_C_LOCALE
|
|
||||||
- locale_t old_lc_ctype;
|
|
||||||
- locale_t c_lc_ctype;
|
|
||||||
-#else
|
|
||||||
- char *old_lc_ctype;
|
|
||||||
-#endif
|
|
||||||
- int rc;
|
|
||||||
- regex_t rx;
|
|
||||||
-} file_regex_t;
|
|
||||||
|
|
||||||
protected int file_regcomp(file_regex_t *, const char *, int);
|
|
||||||
protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *,
|
|
||||||
Index: file-5.41/src/softmagic.c
|
|
||||||
===================================================================
|
|
||||||
--- file-5.41.orig/src/softmagic.c
|
|
||||||
+++ file-5.41/src/softmagic.c
|
|
||||||
@@ -43,7 +43,7 @@ FILE_RCSID("@(#)$File: softmagic.c,v 1.3
|
|
||||||
#include <time.h>
|
|
||||||
#include "der.h"
|
|
||||||
|
|
||||||
-private int match(struct magic_set *, struct magic *, uint32_t,
|
|
||||||
+private int match(struct magic_set *, struct magic *, file_regex_t **, uint32_t,
|
|
||||||
const struct buffer *, size_t, int, int, int, uint16_t *,
|
|
||||||
uint16_t *, int *, int *, int *, int *);
|
|
||||||
private int mget(struct magic_set *, struct magic *, const struct buffer *,
|
|
||||||
@@ -52,7 +52,7 @@ private int mget(struct magic_set *, str
|
|
||||||
uint16_t *, int *, int *, int *, int *);
|
|
||||||
private int msetoffset(struct magic_set *, struct magic *, struct buffer *,
|
|
||||||
const struct buffer *, size_t, unsigned int);
|
|
||||||
-private int magiccheck(struct magic_set *, struct magic *);
|
|
||||||
+private int magiccheck(struct magic_set *, struct magic *, file_regex_t **);
|
|
||||||
private int32_t mprint(struct magic_set *, struct magic *);
|
|
||||||
private int moffset(struct magic_set *, struct magic *, const struct buffer *,
|
|
||||||
int32_t *);
|
|
||||||
@@ -131,7 +131,7 @@ file_softmagic(struct magic_set *ms, con
|
|
||||||
}
|
|
||||||
|
|
||||||
for (ml = ms->mlist[0]->next; ml != ms->mlist[0]; ml = ml->next)
|
|
||||||
- if ((rv = match(ms, ml->magic, ml->nmagic, b, 0, mode,
|
|
||||||
+ if ((rv = match(ms, ml->magic, ml->magic_rxcomp, ml->nmagic, b, 0, mode,
|
|
||||||
text, 0, indir_count, name_count,
|
|
||||||
&printed_something, &need_separator, NULL, NULL)) != 0)
|
|
||||||
return rv;
|
|
||||||
@@ -191,7 +191,7 @@ file_fmtcheck(struct magic_set *ms, cons
|
|
||||||
* so that higher-level continuations are processed.
|
|
||||||
*/
|
|
||||||
private int
|
|
||||||
-match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
|
|
||||||
+match(struct magic_set *ms, struct magic *magic, file_regex_t **magic_rxcomp, uint32_t nmagic,
|
|
||||||
const struct buffer *b, size_t offset, int mode, int text,
|
|
||||||
int flip, uint16_t *indir_count, uint16_t *name_count,
|
|
||||||
int *printed_something, int *need_separator, int *returnval,
|
|
||||||
@@ -220,6 +220,7 @@ match(struct magic_set *ms, struct magic
|
|
||||||
for (magindex = 0; magindex < nmagic; magindex++) {
|
|
||||||
int flush = 0;
|
|
||||||
struct magic *m = &magic[magindex];
|
|
||||||
+ file_regex_t** m_rxcomp = &magic_rxcomp[magindex];
|
|
||||||
|
|
||||||
if (m->type != FILE_NAME)
|
|
||||||
if ((IS_STRING(m->type) &&
|
|
||||||
@@ -257,7 +258,7 @@ flush:
|
|
||||||
*returnval = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
- switch (magiccheck(ms, m)) {
|
|
||||||
+ switch (magiccheck(ms, m, m_rxcomp)) {
|
|
||||||
case -1:
|
|
||||||
return -1;
|
|
||||||
case 0:
|
|
||||||
@@ -318,6 +319,7 @@ flush:
|
|
||||||
while (magindex + 1 < nmagic &&
|
|
||||||
magic[magindex + 1].cont_level != 0) {
|
|
||||||
m = &magic[++magindex];
|
|
||||||
+ m_rxcomp = &magic_rxcomp[magindex];
|
|
||||||
ms->line = m->lineno; /* for messages */
|
|
||||||
|
|
||||||
if (cont_level < m->cont_level)
|
|
||||||
@@ -371,7 +373,7 @@ flush:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
- switch (flush ? 1 : magiccheck(ms, m)) {
|
|
||||||
+ switch (flush ? 1 : magiccheck(ms, m, m_rxcomp)) {
|
|
||||||
case -1:
|
|
||||||
return -1;
|
|
||||||
case 0:
|
|
||||||
@@ -655,7 +657,7 @@ mprint(struct magic_set *ms, struct magi
|
|
||||||
|
|
||||||
if (m->str_flags & STRING_TRIM)
|
|
||||||
str = file_strtrim(str);
|
|
||||||
-
|
|
||||||
+
|
|
||||||
if (file_printf(ms, F(ms, desc, "%s"),
|
|
||||||
file_printable(ms, sbuf, sizeof(sbuf), str,
|
|
||||||
sizeof(p->s) - (str - p->s))) == -1)
|
|
||||||
@@ -770,7 +772,7 @@ mprint(struct magic_set *ms, struct magi
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
scp = (m->str_flags & STRING_TRIM) ? file_strtrim(cp) : cp;
|
|
||||||
-
|
|
||||||
+
|
|
||||||
rval = file_printf(ms, F(ms, desc, "%s"), file_printable(ms,
|
|
||||||
sbuf, sizeof(sbuf), scp, ms->search.rm_len));
|
|
||||||
free(cp);
|
|
||||||
@@ -1822,7 +1824,7 @@ mget(struct magic_set *ms, struct magic
|
|
||||||
for (mlp = ms->mlist[0]->next; mlp != ms->mlist[0];
|
|
||||||
mlp = mlp->next)
|
|
||||||
{
|
|
||||||
- if ((rv = match(ms, mlp->magic, mlp->nmagic, &bb, 0,
|
|
||||||
+ if ((rv = match(ms, mlp->magic, mlp->magic_rxcomp, mlp->nmagic, &bb, 0,
|
|
||||||
BINTEST, text, 0, indir_count, name_count,
|
|
||||||
printed_something, need_separator, NULL,
|
|
||||||
NULL)) != 0)
|
|
||||||
@@ -1875,7 +1877,7 @@ mget(struct magic_set *ms, struct magic
|
|
||||||
nfound_match = 0;
|
|
||||||
(*name_count)++;
|
|
||||||
eoffset = ms->eoffset;
|
|
||||||
- rv = match(ms, ml.magic, ml.nmagic, b, offset + o,
|
|
||||||
+ rv = match(ms, ml.magic, ml.magic_rxcomp, ml.nmagic, b, offset + o,
|
|
||||||
mode, text, flip, indir_count, name_count,
|
|
||||||
printed_something, need_separator, returnval,
|
|
||||||
&nfound_match);
|
|
||||||
@@ -1999,7 +2001,7 @@ file_strncmp16(const char *a, const char
|
|
||||||
}
|
|
||||||
|
|
||||||
private int
|
|
||||||
-magiccheck(struct magic_set *ms, struct magic *m)
|
|
||||||
+magiccheck(struct magic_set *ms, struct magic *m, file_regex_t** m_cache)
|
|
||||||
{
|
|
||||||
uint64_t l = m->value.q;
|
|
||||||
uint64_t v;
|
|
||||||
@@ -2182,27 +2184,32 @@ magiccheck(struct magic_set *ms, struct
|
|
||||||
}
|
|
||||||
case FILE_REGEX: {
|
|
||||||
int rc;
|
|
||||||
- file_regex_t rx;
|
|
||||||
+ file_regex_t *rx = *m_cache;
|
|
||||||
const char *search;
|
|
||||||
|
|
||||||
if (ms->search.s == NULL)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
+ if (rx == NULL) {
|
|
||||||
+ rx = *m_cache = CAST(file_regex_t*, malloc(sizeof(file_regex_t)));
|
|
||||||
+ rc = file_regcomp(rx, m->value.s,
|
|
||||||
+ REG_EXTENDED|REG_NEWLINE|
|
|
||||||
+ ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
|
|
||||||
+ if (rc) {
|
|
||||||
+ file_regerror(rx, rc, ms);
|
|
||||||
+ file_regfree(rx);
|
|
||||||
+ v = CAST(uint64_t, -1);
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
l = 0;
|
|
||||||
- rc = file_regcomp(&rx, m->value.s,
|
|
||||||
- REG_EXTENDED|REG_NEWLINE|
|
|
||||||
- ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
|
|
||||||
- if (rc) {
|
|
||||||
- file_regerror(&rx, rc, ms);
|
|
||||||
- v = CAST(uint64_t, -1);
|
|
||||||
- } else {
|
|
||||||
+ {
|
|
||||||
regmatch_t pmatch;
|
|
||||||
size_t slen = ms->search.s_len;
|
|
||||||
char *copy;
|
|
||||||
if (slen != 0) {
|
|
||||||
copy = CAST(char *, malloc(slen));
|
|
||||||
if (copy == NULL) {
|
|
||||||
- file_regfree(&rx);
|
|
||||||
file_error(ms, errno,
|
|
||||||
"can't allocate %" SIZE_T_FORMAT "u bytes",
|
|
||||||
slen);
|
|
||||||
@@ -2215,14 +2222,14 @@ magiccheck(struct magic_set *ms, struct
|
|
||||||
search = CCAST(char *, "");
|
|
||||||
copy = NULL;
|
|
||||||
}
|
|
||||||
- rc = file_regexec(&rx, RCAST(const char *, search),
|
|
||||||
+ rc = file_regexec(rx, RCAST(const char *, search),
|
|
||||||
1, &pmatch, 0);
|
|
||||||
free(copy);
|
|
||||||
switch (rc) {
|
|
||||||
case 0:
|
|
||||||
ms->search.s += CAST(int, pmatch.rm_so);
|
|
||||||
ms->search.offset += CAST(size_t, pmatch.rm_so);
|
|
||||||
- ms->search.rm_len = CAST(size_t,
|
|
||||||
+ ms->search.rm_len = CAST(size_t,
|
|
||||||
pmatch.rm_eo - pmatch.rm_so);
|
|
||||||
v = 0;
|
|
||||||
break;
|
|
||||||
@@ -2232,12 +2239,11 @@ magiccheck(struct magic_set *ms, struct
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
- file_regerror(&rx, rc, ms);
|
|
||||||
+ file_regerror(rx, rc, ms);
|
|
||||||
v = CAST(uint64_t, -1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
- file_regfree(&rx);
|
|
||||||
if (v == CAST(uint64_t, -1))
|
|
||||||
return -1;
|
|
||||||
break;
|
|
@ -45,7 +45,6 @@ Source4: ftp://ftp.astron.com/pub/file/file-%{version}.tar.gz.asc
|
|||||||
Source5: file.keyring
|
Source5: file.keyring
|
||||||
Patch: file-5.41.dif
|
Patch: file-5.41.dif
|
||||||
Patch1: file-5.19-misc.dif
|
Patch1: file-5.19-misc.dif
|
||||||
Patch2: 0001-Cache-compiled-regexps-between-magic-matches.patch
|
|
||||||
Patch4: file-4.24-autoconf.dif
|
Patch4: file-4.24-autoconf.dif
|
||||||
Patch5: file-5.14-tex.dif
|
Patch5: file-5.14-tex.dif
|
||||||
Patch7: file-4.20-ssd.dif
|
Patch7: file-4.20-ssd.dif
|
||||||
@ -109,7 +108,6 @@ to develop applications that require the magic "file" interface.
|
|||||||
%prep
|
%prep
|
||||||
%setup -q -n file-%{version}
|
%setup -q -n file-%{version}
|
||||||
%patch1 -p0 -b .misc
|
%patch1 -p0 -b .misc
|
||||||
%patch2 -p1 -b .cache
|
|
||||||
%patch4 -p0 -b .conf
|
%patch4 -p0 -b .conf
|
||||||
%patch5 -p0 -b .tex
|
%patch5 -p0 -b .tex
|
||||||
%patch7 -p0 -b .ssd
|
%patch7 -p0 -b .ssd
|
||||||
@ -140,7 +138,7 @@ rm -f ltcf-c.sh ltconfig ltmain.sh
|
|||||||
autoreconf -fiv
|
autoreconf -fiv
|
||||||
export CFLAGS="%{optflags} -DHOWMANY=69632 -fPIE $(pkg-config libseccomp --cflags)"
|
export CFLAGS="%{optflags} -DHOWMANY=69632 -fPIE $(pkg-config libseccomp --cflags)"
|
||||||
%configure --disable-silent-rules --datadir=%{_miscdir} \
|
%configure --disable-silent-rules --datadir=%{_miscdir} \
|
||||||
--disable-static --disable-libseccomp \
|
--disable-static \
|
||||||
--enable-fsect-man5
|
--enable-fsect-man5
|
||||||
make %{?_smp_mflags} pkgdatadir='$(datadir)' LDFLAGS="-pie"
|
make %{?_smp_mflags} pkgdatadir='$(datadir)' LDFLAGS="-pie"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user