SHA256
1
0
forked from pool/file

Accepting request 963483 from home:dirkmueller:Factory

- add file-5.41-cache-regexps.patch to cache regexp lookups 

- spec-cleaner run

OBS-URL: https://build.opensuse.org/request/show/963483
OBS-URL: https://build.opensuse.org/package/show/Base:System/file?expand=0&rev=237
This commit is contained in:
Dr. Werner Fink 2022-03-21 09:18:32 +00:00 committed by Git OBS Bridge
parent d36175d596
commit 97ba010f1a
6 changed files with 366 additions and 11 deletions

View File

@ -0,0 +1,346 @@
From 1957db8212e9c74e5d626de3023e49d0bb502052 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dirk=20M=C3=BCller?= <dirk@dmllr.de>
Date: Fri, 11 Mar 2022 23:51:55 +0100
Subject: [PATCH] Cache compiled regexps between magic matches
regcomp() is relatively expensive compared to regexec() for matching,
so it helps to only compile once and then reuse the compiled version
for future matches of the same magic.
when doing equivalent of `find | xargs file` this provides a massive
speedup, between factor 2 and 4 depending on how heavy the magic
is on regexp usage.
The memory overhead is mediocre (~ 200kb ) and it compiles regexps
lazy, so it doesn't add significant overhead to single match usecases.
---
src/apprentice.c | 26 +++++++++++++++++++----
src/file.h | 40 ++++++++++++++++++-----------------
src/softmagic.c | 54 +++++++++++++++++++++++++++---------------------
3 files changed, 73 insertions(+), 47 deletions(-)
Index: file-5.41/src/apprentice.c
===================================================================
--- file-5.41.orig/src/apprentice.c
+++ file-5.41/src/apprentice.c
@@ -427,7 +427,14 @@ add_mlist(struct mlist *mlp, struct magi
ml->map = idx == 0 ? map : NULL;
ml->magic = map->magic[idx];
ml->nmagic = map->nmagic[idx];
-
+ ml->magic_rxcomp = NULL;
+ if (ml->nmagic) {
+ ml->magic_rxcomp = CAST(file_regex_t**, calloc(ml->nmagic, sizeof(file_regex_t*)));
+ if (ml->magic_rxcomp == NULL) {
+ free(ml);
+ return -1;
+ }
+ }
mlp->prev->next = ml;
ml->prev = mlp->prev;
ml->next = mlp;
@@ -612,8 +619,19 @@ mlist_free_all(struct magic_set *ms)
private void
mlist_free_one(struct mlist *ml)
{
+ size_t i;
+
if (ml->map)
apprentice_unmap(CAST(struct magic_map *, ml->map));
+
+ for (i = 0; i < ml->nmagic; ++i) {
+ if (ml->magic_rxcomp[i]) {
+ file_regfree(ml->magic_rxcomp[i]);
+ free(ml->magic_rxcomp[i]);
+ }
+ }
+ free(ml->magic_rxcomp);
+ ml->magic_rxcomp = NULL;
free(ml);
}
@@ -3489,16 +3507,16 @@ file_magicfind(struct magic_set *ms, con
for (ml = mlist->next; ml != mlist; ml = ml->next) {
struct magic *ma = ml->magic;
- uint32_t nma = ml->nmagic;
- for (i = 0; i < nma; i++) {
+ for (i = 0; i < ml->nmagic; i++) {
if (ma[i].type != FILE_NAME)
continue;
if (strcmp(ma[i].value.s, name) == 0) {
v->magic = &ma[i];
- for (j = i + 1; j < nma; j++)
+ for (j = i + 1; j < ml->nmagic; j++)
if (ma[j].cont_level == 0)
break;
v->nmagic = j - i;
+ v->magic_rxcomp = ml->magic_rxcomp;
return 0;
}
}
Index: file-5.41/src/file.h
===================================================================
--- file-5.41.orig/src/file.h
+++ file-5.41/src/file.h
@@ -88,6 +88,10 @@
/* Do this here and now, because struct stat gets re-defined on solaris */
#include <sys/stat.h>
#include <stdarg.h>
+#include <locale.h>
+#if defined(HAVE_XLOCALE_H)
+#include <xlocale.h>
+#endif
#define ENABLE_CONDITIONALS
@@ -167,6 +171,19 @@
#define FILE_COMPILE 2
#define FILE_LIST 3
+typedef struct {
+ const char *pat;
+#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE)
+#define USE_C_LOCALE
+ locale_t old_lc_ctype;
+ locale_t c_lc_ctype;
+#else
+ char *old_lc_ctype;
+#endif
+ int rc;
+ regex_t rx;
+} file_regex_t;
+
struct buffer {
int fd;
struct stat st;
@@ -397,9 +414,10 @@ struct magic {
/* list of magic entries */
struct mlist {
- struct magic *magic; /* array of magic entries */
- uint32_t nmagic; /* number of entries in array */
- void *map; /* internal resources used by entry */
+ struct magic *magic; /* array of magic entries */
+ file_regex_t **magic_rxcomp; /* array of compiled regexps */
+ size_t nmagic; /* number of entries in array */
+ void *map; /* internal resources used by entry */
struct mlist *next, *prev;
};
@@ -568,23 +586,7 @@ protected void buffer_init(struct buffer
protected void buffer_fini(struct buffer *);
protected int buffer_fill(const struct buffer *);
-#include <locale.h>
-#if defined(HAVE_XLOCALE_H)
-#include <xlocale.h>
-#endif
-typedef struct {
- const char *pat;
-#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE)
-#define USE_C_LOCALE
- locale_t old_lc_ctype;
- locale_t c_lc_ctype;
-#else
- char *old_lc_ctype;
-#endif
- int rc;
- regex_t rx;
-} file_regex_t;
protected int file_regcomp(file_regex_t *, const char *, int);
protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *,
Index: file-5.41/src/softmagic.c
===================================================================
--- file-5.41.orig/src/softmagic.c
+++ file-5.41/src/softmagic.c
@@ -43,7 +43,7 @@ FILE_RCSID("@(#)$File: softmagic.c,v 1.3
#include <time.h>
#include "der.h"
-private int match(struct magic_set *, struct magic *, uint32_t,
+private int match(struct magic_set *, struct magic *, file_regex_t **, uint32_t,
const struct buffer *, size_t, int, int, int, uint16_t *,
uint16_t *, int *, int *, int *, int *);
private int mget(struct magic_set *, struct magic *, const struct buffer *,
@@ -52,7 +52,7 @@ private int mget(struct magic_set *, str
uint16_t *, int *, int *, int *, int *);
private int msetoffset(struct magic_set *, struct magic *, struct buffer *,
const struct buffer *, size_t, unsigned int);
-private int magiccheck(struct magic_set *, struct magic *);
+private int magiccheck(struct magic_set *, struct magic *, file_regex_t **);
private int32_t mprint(struct magic_set *, struct magic *);
private int moffset(struct magic_set *, struct magic *, const struct buffer *,
int32_t *);
@@ -131,7 +131,7 @@ file_softmagic(struct magic_set *ms, con
}
for (ml = ms->mlist[0]->next; ml != ms->mlist[0]; ml = ml->next)
- if ((rv = match(ms, ml->magic, ml->nmagic, b, 0, mode,
+ if ((rv = match(ms, ml->magic, ml->magic_rxcomp, ml->nmagic, b, 0, mode,
text, 0, indir_count, name_count,
&printed_something, &need_separator, NULL, NULL)) != 0)
return rv;
@@ -191,7 +191,7 @@ file_fmtcheck(struct magic_set *ms, cons
* so that higher-level continuations are processed.
*/
private int
-match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
+match(struct magic_set *ms, struct magic *magic, file_regex_t **magic_rxcomp, uint32_t nmagic,
const struct buffer *b, size_t offset, int mode, int text,
int flip, uint16_t *indir_count, uint16_t *name_count,
int *printed_something, int *need_separator, int *returnval,
@@ -220,6 +220,7 @@ match(struct magic_set *ms, struct magic
for (magindex = 0; magindex < nmagic; magindex++) {
int flush = 0;
struct magic *m = &magic[magindex];
+ file_regex_t** m_rxcomp = &magic_rxcomp[magindex];
if (m->type != FILE_NAME)
if ((IS_STRING(m->type) &&
@@ -257,7 +258,7 @@ flush:
*returnval = 1;
}
- switch (magiccheck(ms, m)) {
+ switch (magiccheck(ms, m, m_rxcomp)) {
case -1:
return -1;
case 0:
@@ -318,6 +319,7 @@ flush:
while (magindex + 1 < nmagic &&
magic[magindex + 1].cont_level != 0) {
m = &magic[++magindex];
+ m_rxcomp = &magic_rxcomp[magindex];
ms->line = m->lineno; /* for messages */
if (cont_level < m->cont_level)
@@ -371,7 +373,7 @@ flush:
break;
}
- switch (flush ? 1 : magiccheck(ms, m)) {
+ switch (flush ? 1 : magiccheck(ms, m, m_rxcomp)) {
case -1:
return -1;
case 0:
@@ -655,7 +657,7 @@ mprint(struct magic_set *ms, struct magi
if (m->str_flags & STRING_TRIM)
str = file_strtrim(str);
-
+
if (file_printf(ms, F(ms, desc, "%s"),
file_printable(ms, sbuf, sizeof(sbuf), str,
sizeof(p->s) - (str - p->s))) == -1)
@@ -770,7 +772,7 @@ mprint(struct magic_set *ms, struct magi
return -1;
}
scp = (m->str_flags & STRING_TRIM) ? file_strtrim(cp) : cp;
-
+
rval = file_printf(ms, F(ms, desc, "%s"), file_printable(ms,
sbuf, sizeof(sbuf), scp, ms->search.rm_len));
free(cp);
@@ -1822,7 +1824,7 @@ mget(struct magic_set *ms, struct magic
for (mlp = ms->mlist[0]->next; mlp != ms->mlist[0];
mlp = mlp->next)
{
- if ((rv = match(ms, mlp->magic, mlp->nmagic, &bb, 0,
+ if ((rv = match(ms, mlp->magic, mlp->magic_rxcomp, mlp->nmagic, &bb, 0,
BINTEST, text, 0, indir_count, name_count,
printed_something, need_separator, NULL,
NULL)) != 0)
@@ -1875,7 +1877,7 @@ mget(struct magic_set *ms, struct magic
nfound_match = 0;
(*name_count)++;
eoffset = ms->eoffset;
- rv = match(ms, ml.magic, ml.nmagic, b, offset + o,
+ rv = match(ms, ml.magic, ml.magic_rxcomp, ml.nmagic, b, offset + o,
mode, text, flip, indir_count, name_count,
printed_something, need_separator, returnval,
&nfound_match);
@@ -1999,7 +2001,7 @@ file_strncmp16(const char *a, const char
}
private int
-magiccheck(struct magic_set *ms, struct magic *m)
+magiccheck(struct magic_set *ms, struct magic *m, file_regex_t** m_cache)
{
uint64_t l = m->value.q;
uint64_t v;
@@ -2182,27 +2184,32 @@ magiccheck(struct magic_set *ms, struct
}
case FILE_REGEX: {
int rc;
- file_regex_t rx;
+ file_regex_t *rx = *m_cache;
const char *search;
if (ms->search.s == NULL)
return 0;
+ if (rx == NULL) {
+ rx = *m_cache = CAST(file_regex_t*, malloc(sizeof(file_regex_t)));
+ rc = file_regcomp(rx, m->value.s,
+ REG_EXTENDED|REG_NEWLINE|
+ ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
+ if (rc) {
+ file_regerror(rx, rc, ms);
+ file_regfree(rx);
+ v = CAST(uint64_t, -1);
+ break;
+ }
+ }
l = 0;
- rc = file_regcomp(&rx, m->value.s,
- REG_EXTENDED|REG_NEWLINE|
- ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
- if (rc) {
- file_regerror(&rx, rc, ms);
- v = CAST(uint64_t, -1);
- } else {
+ {
regmatch_t pmatch;
size_t slen = ms->search.s_len;
char *copy;
if (slen != 0) {
copy = CAST(char *, malloc(slen));
if (copy == NULL) {
- file_regfree(&rx);
file_error(ms, errno,
"can't allocate %" SIZE_T_FORMAT "u bytes",
slen);
@@ -2215,14 +2222,14 @@ magiccheck(struct magic_set *ms, struct
search = CCAST(char *, "");
copy = NULL;
}
- rc = file_regexec(&rx, RCAST(const char *, search),
+ rc = file_regexec(rx, RCAST(const char *, search),
1, &pmatch, 0);
free(copy);
switch (rc) {
case 0:
ms->search.s += CAST(int, pmatch.rm_so);
ms->search.offset += CAST(size_t, pmatch.rm_so);
- ms->search.rm_len = CAST(size_t,
+ ms->search.rm_len = CAST(size_t,
pmatch.rm_eo - pmatch.rm_so);
v = 0;
break;
@@ -2232,12 +2239,11 @@ magiccheck(struct magic_set *ms, struct
break;
default:
- file_regerror(&rx, rc, ms);
+ file_regerror(rx, rc, ms);
v = CAST(uint64_t, -1);
break;
}
}
- file_regfree(&rx);
if (v == CAST(uint64_t, -1))
return -1;
break;

View File

@ -1,3 +1,8 @@
-------------------------------------------------------------------
Sat Mar 19 18:00:32 UTC 2022 - Dirk Müller <dmueller@suse.com>
- add file-5.41-cache-regexps.patch to cache regexp lookups
-------------------------------------------------------------------
Thu Feb 24 10:05:17 UTC 2022 - Dr. Werner Fink <werner@suse.de>

View File

@ -62,6 +62,7 @@ Patch31: file-5.19-biorad.dif
Patch32: file-5.19-clicfs.dif
Patch34: file-5.23-endian.patch
Patch37: file-secure_getenv.patch
Patch38: file-5.41-cache-regexps.patch
Patch39: file-5.28-btrfs-image.dif
# Upstream commits as patches
BuildRoot: %{_tmppath}/%{name}-%{version}-build
@ -125,6 +126,7 @@ to develop applications that require the magic "file" interface.
%patch32 -p0 -b .clicfs
%patch34 -p0 -b .endian
%patch37 -p1 -b .getenv
%patch38 -p1 -b .regexp
%patch39 -p1 -b .btrfs
%patch -b .0
test -s src/magic.h.in || cp -p src/magic.h src/magic.h.in

View File

@ -1,3 +1,8 @@
-------------------------------------------------------------------
Sat Mar 19 18:01:52 UTC 2022 - Dirk Müller <dmueller@suse.com>
- spec-cleaner run
-------------------------------------------------------------------
Tue Oct 19 09:55:47 UTC 2021 - Dr. Werner Fink <werner@suse.de>

View File

@ -19,25 +19,23 @@
# PyPI package name is file-magic. Version is taken from setup.py
%define file_magic_version 0.3.0
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
%global _miscdir %{_datadir}/misc
Name: python-magic
BuildRequires: %{python_module setuptools}
BuildRequires: findutils
BuildRequires: libtool
BuildRequires: python-rpm-macros
BuildRequires: zlib-devel
URL: http://www.darwinsys.com/file/
Version: 5.41
Release: 0
Summary: Python module to use libmagic
License: BSD-3-Clause AND BSD-4-Clause
Group: Development/Languages/Python
%{expand:%(sed -n -e '/^Source0\?:/,/^BuildRoot:/p' <%{_sourcedir}/file.spec)}
URL: https://www.darwinsys.com/file/
Source99: file.spec
BuildRequires: %{python_module setuptools}
BuildRequires: findutils
BuildRequires: libtool
BuildRequires: python-rpm-macros
BuildRequires: zlib-devel
Requires: libmagic1
Provides: python-file-magic = %{file_magic_version}
%global _miscdir %{_datadir}/misc
%{expand:%(sed -n -e '/^Source0\?:/,/^BuildRoot:/p' <%{_sourcedir}/file.spec)}
%python_subpackages
%description
@ -58,7 +56,6 @@ pushd python
popd
%files %{python_files}
%defattr(-,root,root)
%doc python/README python/example.py
%{python_sitelib}/magic.py*
%pycache_only %{python_sitelib}/__pycache__