diff --git a/0001-Cache-compiled-regexps-between-magic-matches.patch b/0001-Cache-compiled-regexps-between-magic-matches.patch index 098119d..473a0f4 100644 --- a/0001-Cache-compiled-regexps-between-magic-matches.patch +++ b/0001-Cache-compiled-regexps-between-magic-matches.patch @@ -1,346 +0,0 @@ -From 1957db8212e9c74e5d626de3023e49d0bb502052 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Dirk=20M=C3=BCller?= -Date: Fri, 11 Mar 2022 23:51:55 +0100 -Subject: [PATCH] Cache compiled regexps between magic matches - -regcomp() is relatively expensive compared to regexec() for matching, -so it helps to only compile once and then reuse the compiled version -for future matches of the same magic. - -when doing equivalent of `find | xargs file` this provides a massive -speedup, between factor 2 and 4 depending on how heavy the magic -is on regexp usage. - -The memory overhead is mediocre (~ 200kb ) and it compiles regexps -lazy, so it doesn't add significant overhead to single match usecases. ---- - src/apprentice.c | 26 +++++++++++++++++++---- - src/file.h | 40 ++++++++++++++++++----------------- - src/softmagic.c | 54 +++++++++++++++++++++++++++--------------------- - 3 files changed, 73 insertions(+), 47 deletions(-) - -Index: file-5.41/src/apprentice.c -=================================================================== ---- file-5.41.orig/src/apprentice.c -+++ file-5.41/src/apprentice.c -@@ -425,7 +425,14 @@ add_mlist(struct mlist *mlp, struct magi - ml->map = idx == 0 ? map : NULL; - ml->magic = map->magic[idx]; - ml->nmagic = map->nmagic[idx]; -- -+ ml->magic_rxcomp = NULL; -+ if (ml->nmagic) { -+ ml->magic_rxcomp = CAST(file_regex_t**, calloc(ml->nmagic, sizeof(file_regex_t*))); -+ if (ml->magic_rxcomp == NULL) { -+ free(ml); -+ return -1; -+ } -+ } - mlp->prev->next = ml; - ml->prev = mlp->prev; - ml->next = mlp; -@@ -610,8 +617,19 @@ mlist_free_all(struct magic_set *ms) - private void - mlist_free_one(struct mlist *ml) - { -+ size_t i; -+ - if (ml->map) - apprentice_unmap(CAST(struct magic_map *, ml->map)); -+ -+ for (i = 0; i < ml->nmagic; ++i) { -+ if (ml->magic_rxcomp[i]) { -+ file_regfree(ml->magic_rxcomp[i]); -+ free(ml->magic_rxcomp[i]); -+ } -+ } -+ free(ml->magic_rxcomp); -+ ml->magic_rxcomp = NULL; - free(ml); - } - -@@ -3548,16 +3566,16 @@ file_magicfind(struct magic_set *ms, con - - for (ml = mlist->next; ml != mlist; ml = ml->next) { - struct magic *ma = ml->magic; -- uint32_t nma = ml->nmagic; -- for (i = 0; i < nma; i++) { -+ for (i = 0; i < ml->nmagic; i++) { - if (ma[i].type != FILE_NAME) - continue; - if (strcmp(ma[i].value.s, name) == 0) { - v->magic = &ma[i]; -- for (j = i + 1; j < nma; j++) -+ for (j = i + 1; j < ml->nmagic; j++) - if (ma[j].cont_level == 0) - break; - v->nmagic = j - i; -+ v->magic_rxcomp = ml->magic_rxcomp; - return 0; - } - } -Index: file-5.41/src/file.h -=================================================================== ---- file-5.41.orig/src/file.h -+++ file-5.41/src/file.h -@@ -88,6 +88,10 @@ - /* Do this here and now, because struct stat gets re-defined on solaris */ - #include - #include -+#include -+#if defined(HAVE_XLOCALE_H) -+#include -+#endif - - #define ENABLE_CONDITIONALS - -@@ -167,6 +171,19 @@ - #define FILE_COMPILE 2 - #define FILE_LIST 3 - -+typedef struct { -+ const char *pat; -+#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE) -+#define USE_C_LOCALE -+ locale_t old_lc_ctype; -+ locale_t c_lc_ctype; -+#else -+ char *old_lc_ctype; -+#endif -+ int rc; -+ regex_t rx; -+} file_regex_t; -+ - struct buffer { - int fd; - struct stat st; -@@ -397,9 +414,10 @@ struct magic { - - /* list of magic entries */ - struct mlist { -- struct magic *magic; /* array of magic entries */ -- uint32_t nmagic; /* number of entries in array */ -- void *map; /* internal resources used by entry */ -+ struct magic *magic; /* array of magic entries */ -+ file_regex_t **magic_rxcomp; /* array of compiled regexps */ -+ size_t nmagic; /* number of entries in array */ -+ void *map; /* internal resources used by entry */ - struct mlist *next, *prev; - }; - -@@ -568,23 +586,7 @@ protected void buffer_init(struct buffer - protected void buffer_fini(struct buffer *); - protected int buffer_fill(const struct buffer *); - --#include --#if defined(HAVE_XLOCALE_H) --#include --#endif - --typedef struct { -- const char *pat; --#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE) --#define USE_C_LOCALE -- locale_t old_lc_ctype; -- locale_t c_lc_ctype; --#else -- char *old_lc_ctype; --#endif -- int rc; -- regex_t rx; --} file_regex_t; - - protected int file_regcomp(file_regex_t *, const char *, int); - protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *, -Index: file-5.41/src/softmagic.c -=================================================================== ---- file-5.41.orig/src/softmagic.c -+++ file-5.41/src/softmagic.c -@@ -43,7 +43,7 @@ FILE_RCSID("@(#)$File: softmagic.c,v 1.3 - #include - #include "der.h" - --private int match(struct magic_set *, struct magic *, uint32_t, -+private int match(struct magic_set *, struct magic *, file_regex_t **, uint32_t, - const struct buffer *, size_t, int, int, int, uint16_t *, - uint16_t *, int *, int *, int *, int *); - private int mget(struct magic_set *, struct magic *, const struct buffer *, -@@ -52,7 +52,7 @@ private int mget(struct magic_set *, str - uint16_t *, int *, int *, int *, int *); - private int msetoffset(struct magic_set *, struct magic *, struct buffer *, - const struct buffer *, size_t, unsigned int); --private int magiccheck(struct magic_set *, struct magic *); -+private int magiccheck(struct magic_set *, struct magic *, file_regex_t **); - private int32_t mprint(struct magic_set *, struct magic *); - private int moffset(struct magic_set *, struct magic *, const struct buffer *, - int32_t *); -@@ -131,7 +131,7 @@ file_softmagic(struct magic_set *ms, con - } - - for (ml = ms->mlist[0]->next; ml != ms->mlist[0]; ml = ml->next) -- if ((rv = match(ms, ml->magic, ml->nmagic, b, 0, mode, -+ if ((rv = match(ms, ml->magic, ml->magic_rxcomp, ml->nmagic, b, 0, mode, - text, 0, indir_count, name_count, - &printed_something, &need_separator, NULL, NULL)) != 0) - return rv; -@@ -191,7 +191,7 @@ file_fmtcheck(struct magic_set *ms, cons - * so that higher-level continuations are processed. - */ - private int --match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, -+match(struct magic_set *ms, struct magic *magic, file_regex_t **magic_rxcomp, uint32_t nmagic, - const struct buffer *b, size_t offset, int mode, int text, - int flip, uint16_t *indir_count, uint16_t *name_count, - int *printed_something, int *need_separator, int *returnval, -@@ -220,6 +220,7 @@ match(struct magic_set *ms, struct magic - for (magindex = 0; magindex < nmagic; magindex++) { - int flush = 0; - struct magic *m = &magic[magindex]; -+ file_regex_t** m_rxcomp = &magic_rxcomp[magindex]; - - if (m->type != FILE_NAME) - if ((IS_STRING(m->type) && -@@ -257,7 +258,7 @@ flush: - *returnval = 1; - } - -- switch (magiccheck(ms, m)) { -+ switch (magiccheck(ms, m, m_rxcomp)) { - case -1: - return -1; - case 0: -@@ -318,6 +319,7 @@ flush: - while (magindex + 1 < nmagic && - magic[magindex + 1].cont_level != 0) { - m = &magic[++magindex]; -+ m_rxcomp = &magic_rxcomp[magindex]; - ms->line = m->lineno; /* for messages */ - - if (cont_level < m->cont_level) -@@ -371,7 +373,7 @@ flush: - break; - } - -- switch (flush ? 1 : magiccheck(ms, m)) { -+ switch (flush ? 1 : magiccheck(ms, m, m_rxcomp)) { - case -1: - return -1; - case 0: -@@ -655,7 +657,7 @@ mprint(struct magic_set *ms, struct magi - - if (m->str_flags & STRING_TRIM) - str = file_strtrim(str); -- -+ - if (file_printf(ms, F(ms, desc, "%s"), - file_printable(ms, sbuf, sizeof(sbuf), str, - sizeof(p->s) - (str - p->s))) == -1) -@@ -770,7 +772,7 @@ mprint(struct magic_set *ms, struct magi - return -1; - } - scp = (m->str_flags & STRING_TRIM) ? file_strtrim(cp) : cp; -- -+ - rval = file_printf(ms, F(ms, desc, "%s"), file_printable(ms, - sbuf, sizeof(sbuf), scp, ms->search.rm_len)); - free(cp); -@@ -1822,7 +1824,7 @@ mget(struct magic_set *ms, struct magic - for (mlp = ms->mlist[0]->next; mlp != ms->mlist[0]; - mlp = mlp->next) - { -- if ((rv = match(ms, mlp->magic, mlp->nmagic, &bb, 0, -+ if ((rv = match(ms, mlp->magic, mlp->magic_rxcomp, mlp->nmagic, &bb, 0, - BINTEST, text, 0, indir_count, name_count, - printed_something, need_separator, NULL, - NULL)) != 0) -@@ -1875,7 +1877,7 @@ mget(struct magic_set *ms, struct magic - nfound_match = 0; - (*name_count)++; - eoffset = ms->eoffset; -- rv = match(ms, ml.magic, ml.nmagic, b, offset + o, -+ rv = match(ms, ml.magic, ml.magic_rxcomp, ml.nmagic, b, offset + o, - mode, text, flip, indir_count, name_count, - printed_something, need_separator, returnval, - &nfound_match); -@@ -1999,7 +2001,7 @@ file_strncmp16(const char *a, const char - } - - private int --magiccheck(struct magic_set *ms, struct magic *m) -+magiccheck(struct magic_set *ms, struct magic *m, file_regex_t** m_cache) - { - uint64_t l = m->value.q; - uint64_t v; -@@ -2182,27 +2184,32 @@ magiccheck(struct magic_set *ms, struct - } - case FILE_REGEX: { - int rc; -- file_regex_t rx; -+ file_regex_t *rx = *m_cache; - const char *search; - - if (ms->search.s == NULL) - return 0; - -+ if (rx == NULL) { -+ rx = *m_cache = CAST(file_regex_t*, malloc(sizeof(file_regex_t))); -+ rc = file_regcomp(rx, m->value.s, -+ REG_EXTENDED|REG_NEWLINE| -+ ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); -+ if (rc) { -+ file_regerror(rx, rc, ms); -+ file_regfree(rx); -+ v = CAST(uint64_t, -1); -+ break; -+ } -+ } - l = 0; -- rc = file_regcomp(&rx, m->value.s, -- REG_EXTENDED|REG_NEWLINE| -- ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); -- if (rc) { -- file_regerror(&rx, rc, ms); -- v = CAST(uint64_t, -1); -- } else { -+ { - regmatch_t pmatch; - size_t slen = ms->search.s_len; - char *copy; - if (slen != 0) { - copy = CAST(char *, malloc(slen)); - if (copy == NULL) { -- file_regfree(&rx); - file_error(ms, errno, - "can't allocate %" SIZE_T_FORMAT "u bytes", - slen); -@@ -2215,14 +2222,14 @@ magiccheck(struct magic_set *ms, struct - search = CCAST(char *, ""); - copy = NULL; - } -- rc = file_regexec(&rx, RCAST(const char *, search), -+ rc = file_regexec(rx, RCAST(const char *, search), - 1, &pmatch, 0); - free(copy); - switch (rc) { - case 0: - ms->search.s += CAST(int, pmatch.rm_so); - ms->search.offset += CAST(size_t, pmatch.rm_so); -- ms->search.rm_len = CAST(size_t, -+ ms->search.rm_len = CAST(size_t, - pmatch.rm_eo - pmatch.rm_so); - v = 0; - break; -@@ -2232,12 +2239,11 @@ magiccheck(struct magic_set *ms, struct - break; - - default: -- file_regerror(&rx, rc, ms); -+ file_regerror(rx, rc, ms); - v = CAST(uint64_t, -1); - break; - } - } -- file_regfree(&rx); - if (v == CAST(uint64_t, -1)) - return -1; - break; diff --git a/file.spec b/file.spec index b6549e3..d63dcfc 100644 --- a/file.spec +++ b/file.spec @@ -45,7 +45,6 @@ Source4: ftp://ftp.astron.com/pub/file/file-%{version}.tar.gz.asc Source5: file.keyring Patch: file-5.41.dif Patch1: file-5.19-misc.dif -Patch2: 0001-Cache-compiled-regexps-between-magic-matches.patch Patch4: file-4.24-autoconf.dif Patch5: file-5.14-tex.dif Patch7: file-4.20-ssd.dif @@ -109,7 +108,6 @@ to develop applications that require the magic "file" interface. %prep %setup -q -n file-%{version} %patch1 -p0 -b .misc -%patch2 -p1 -b .cache %patch4 -p0 -b .conf %patch5 -p0 -b .tex %patch7 -p0 -b .ssd @@ -140,7 +138,7 @@ rm -f ltcf-c.sh ltconfig ltmain.sh autoreconf -fiv export CFLAGS="%{optflags} -DHOWMANY=69632 -fPIE $(pkg-config libseccomp --cflags)" %configure --disable-silent-rules --datadir=%{_miscdir} \ - --disable-static --disable-libseccomp \ + --disable-static \ --enable-fsect-man5 make %{?_smp_mflags} pkgdatadir='$(datadir)' LDFLAGS="-pie"