diff --git a/file-5.41-cache-regexps-locale-restore.patch b/file-5.41-cache-regexps-locale-restore.patch index 3012443..65120b6 100644 --- a/file-5.41-cache-regexps-locale-restore.patch +++ b/file-5.41-cache-regexps-locale-restore.patch @@ -1,101 +1,236 @@ -From c25329eabeaba048cb6ef1448d1ee040c62c415f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Dirk=20M=C3=BCller?= -Date: Tue, 22 Mar 2022 22:28:14 +0100 -Subject: [PATCH] Restore locale handling after regex caching +From 7d438e28c16773e28a3707935c8e5d9927a515a7 Mon Sep 17 00:00:00 2001 +From: Christos Zoulas +Date: Sat, 19 Mar 2022 19:52:09 +0000 +Subject: [PATCH] Now that we are cacheing regex's we cannot assume that we + always do regcomp->regexec->regfree, so this causes memory corruption (and + increased memory use with all the locale copies) in xlocale systems. Instead + save and restore locales in regcomp and regexec as needed. -file_regcomp/file_regfree had the side effect of setting and restoring -locale C_TYPE to C to have predictable regex matching. With regcomp -caching file_regfree has been changed to be only called at destruction -time, which means the library changed the locale setting for anything -else as well. Restore old behavior by splitting save/restore into -separate functions which are surrounding regcomp() and regexec() only. --- - src/funcs.c | 39 ++++++++++++++++++++++++++++++++------- - 1 file changed, 32 insertions(+), 7 deletions(-) + src/apprentice.c | 13 +++++++++-- + src/file.h | 18 +++++++-------- + src/funcs.c | 57 +++++++++++++++++++++++++++++------------------- + src/softmagic.c | 11 +++++----- + 4 files changed, 59 insertions(+), 40 deletions(-) -diff --git a/src/funcs.c b/src/funcs.c -index dcfd352d..7ecaff33 100644 ---- a/src/funcs.c -+++ b/src/funcs.c -@@ -658,35 +658,62 @@ out: - return rv; +Index: file-5.41/src/apprentice.c +=================================================================== +--- file-5.41.orig/src/apprentice.c ++++ file-5.41/src/apprentice.c +@@ -516,6 +516,9 @@ file_ms_free(struct magic_set *ms) + free(ms->o.pbuf); + free(ms->o.buf); + free(ms->c.li); ++#ifdef USE_C_LOCALE ++ freelocale(ms->c_lc_ctype); ++#endif + free(ms); } -+static void -+file_reg_set_ctype(file_regex_t *rx) -+{ +@@ -555,6 +558,10 @@ file_ms_alloc(int flags) + ms->regex_max = FILE_REGEX_MAX; + ms->bytes_max = FILE_BYTES_MAX; + ms->encoding_max = FILE_ENCODING_MAX; +#ifdef USE_C_LOCALE -+ rx->old_lc_ctype = uselocale(rx->c_lc_ctype); -+ assert(rx->old_lc_ctype != NULL); -+#else -+ (void)setlocale(LC_CTYPE, "C"); ++ ms->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); ++ assert(ms->c_lc_ctype != NULL); +#endif -+} -+ -+static void -+file_reg_restore_ctype(file_regex_t *rx) -+{ -+#ifdef USE_C_LOCALE -+ (void)uselocale(rx->old_lc_ctype); -+#else -+ (void)setlocale(LC_CTYPE, rx->old_lc_ctype); + return ms; + free: + free(ms); +@@ -628,6 +635,7 @@ mlist_free_one(struct mlist *ml) + if (ml->magic_rxcomp[i]) { + file_regfree(ml->magic_rxcomp[i]); + free(ml->magic_rxcomp[i]); ++ ml->magic_rxcomp[i] = NULL; + } + } + free(ml->magic_rxcomp); +@@ -2741,7 +2749,8 @@ getvalue(struct magic_set *ms, struct ma + } + if (m->type == FILE_REGEX) { + file_regex_t rx; +- int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED); ++ int rc = file_regcomp(ms, &rx, m->value.s, ++ REG_EXTENDED); + if (rc) { + if (ms->flags & MAGIC_CHECK) + file_regerror(&rx, rc, ms); +Index: file-5.41/src/file.h +=================================================================== +--- file-5.41.orig/src/file.h ++++ file-5.41/src/file.h +@@ -173,13 +173,6 @@ + + typedef struct { + const char *pat; +-#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE) +-#define USE_C_LOCALE +- locale_t old_lc_ctype; +- locale_t c_lc_ctype; +-#else +- char *old_lc_ctype; +-#endif + int rc; + regex_t rx; + } file_regex_t; +@@ -495,6 +488,10 @@ struct magic_set { + #define FILE_NAME_MAX 50 + #define FILE_REGEX_MAX 8192 + #define FILE_ENCODING_MAX (64 * 1024) ++#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE) ++#define USE_C_LOCALE ++ locale_t c_lc_ctype; +#endif -+} -+ + }; + + /* Type for Unicode characters */ +@@ -588,9 +585,10 @@ protected int buffer_fill(const struct b + + + +-protected int file_regcomp(file_regex_t *, const char *, int); +-protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *, ++protected int file_regcomp(struct magic_set *, file_regex_t *, const char *, + int); ++protected int file_regexec(struct magic_set *, file_regex_t *, const char *, ++ size_t, regmatch_t *, int); + protected void file_regfree(file_regex_t *); + protected void file_regerror(file_regex_t *, int, struct magic_set *); + +Index: file-5.41/src/funcs.c +=================================================================== +--- file-5.41.orig/src/funcs.c ++++ file-5.41/src/funcs.c +@@ -634,13 +634,13 @@ file_replace(struct magic_set *ms, const + file_regex_t rx; + int rc, rv = -1; + +- rc = file_regcomp(&rx, pat, REG_EXTENDED); ++ rc = file_regcomp(ms, &rx, pat, REG_EXTENDED); + if (rc) { + file_regerror(&rx, rc, ms); + } else { + regmatch_t rm; + int nm = 0; +- while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { ++ while (file_regexec(ms, &rx, ms->o.buf, 1, &rm, 0) == 0) { + ms->o.buf[rm.rm_so] = '\0'; + if (file_printf(ms, "%s%s", rep, + rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) +@@ -655,34 +655,52 @@ out: + } + protected int - file_regcomp(file_regex_t *rx, const char *pat, int flags) +-file_regcomp(file_regex_t *rx, const char *pat, int flags) ++file_regcomp(struct magic_set *ms, file_regex_t *rx, const char *pat, int flags) { #ifdef USE_C_LOCALE - rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); - assert(rx->c_lc_ctype != NULL); +- rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); +- assert(rx->c_lc_ctype != NULL); - rx->old_lc_ctype = uselocale(rx->c_lc_ctype); - assert(rx->old_lc_ctype != NULL); ++ locale_t old = uselocale(ms->c_lc_ctype); ++ assert(old != NULL); #else - rx->old_lc_ctype = setlocale(LC_CTYPE, NULL); - assert(rx->old_lc_ctype != NULL); - rx->old_lc_ctype = strdup(rx->old_lc_ctype); - assert(rx->old_lc_ctype != NULL); -- (void)setlocale(LC_CTYPE, "C"); +- rx->old_lc_ctype = setlocale(LC_CTYPE, NULL); +- assert(rx->old_lc_ctype != NULL); +- rx->old_lc_ctype = strdup(rx->old_lc_ctype); +- assert(rx->old_lc_ctype != NULL); ++ char old[1024]; ++ strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); + (void)setlocale(LC_CTYPE, "C"); #endif rx->pat = pat; - return rx->rc = regcomp(&rx->rx, pat, flags); -+ file_reg_set_ctype(rx); + rx->rc = regcomp(&rx->rx, pat, flags); -+ file_reg_restore_ctype(rx); + ++#ifdef USE_C_LOCALE ++ uselocale(old); ++#else ++ (void)setlocale(LC_CTYPE, old); ++#endif + return rx->rc; } protected int - file_regexec(file_regex_t *rx, const char *str, size_t nmatch, - regmatch_t* pmatch, int eflags) +-file_regexec(file_regex_t *rx, const char *str, size_t nmatch, +- regmatch_t* pmatch, int eflags) ++file_regexec(struct magic_set *ms, file_regex_t *rx, const char *str, ++ size_t nmatch, regmatch_t* pmatch, int eflags) { ++#ifdef USE_C_LOCALE ++ locale_t old = uselocale(ms->c_lc_ctype); ++ assert(old != NULL); ++#else ++ char old[1024]; ++ strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); ++ (void)setlocale(LC_CTYPE, "C"); ++#endif + int rc; assert(rx->rc == 0); /* XXX: force initialization because glibc does not always do this */ if (nmatch != 0) memset(pmatch, 0, nmatch * sizeof(*pmatch)); - return regexec(&rx->rx, str, nmatch, pmatch, eflags); -+ file_reg_set_ctype(rx); + rc = regexec(&rx->rx, str, nmatch, pmatch, eflags); -+ file_reg_restore_ctype(rx); -+ ++#ifdef USE_C_LOCALE ++ uselocale(old); ++#else ++ (void)setlocale(LC_CTYPE, old); ++#endif + return rc; } protected void -@@ -695,10 +722,8 @@ file_regfree(file_regex_t *rx) +@@ -690,13 +708,6 @@ file_regfree(file_regex_t *rx) + { if (rx->rc == 0) regfree(&rx->rx); - #ifdef USE_C_LOCALE +-#ifdef USE_C_LOCALE - (void)uselocale(rx->old_lc_ctype); - freelocale(rx->c_lc_ctype); - #else +- freelocale(rx->c_lc_ctype); +-#else - (void)setlocale(LC_CTYPE, rx->old_lc_ctype); - free(rx->old_lc_ctype); - #endif +- free(rx->old_lc_ctype); +-#endif } --- -2.35.1 - + + protected void +Index: file-5.41/src/softmagic.c +=================================================================== +--- file-5.41.orig/src/softmagic.c ++++ file-5.41/src/softmagic.c +@@ -479,11 +479,11 @@ check_fmt(struct magic_set *ms, const ch + if (strchr(fmt, '%') == NULL) + return 0; + +- rc = file_regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); ++ rc = file_regcomp(ms, &rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); + if (rc) { + file_regerror(&rx, rc, ms); + } else { +- rc = file_regexec(&rx, fmt, 0, 0, 0); ++ rc = file_regexec(ms, &rx, fmt, 0, 0, 0); + rv = !rc; + } + file_regfree(&rx); +@@ -2192,7 +2192,7 @@ magiccheck(struct magic_set *ms, struct + + if (rx == NULL) { + rx = *m_cache = CAST(file_regex_t*, malloc(sizeof(file_regex_t))); +- rc = file_regcomp(rx, m->value.s, ++ rc = file_regcomp(ms, rx, m->value.s, + REG_EXTENDED|REG_NEWLINE| + ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); + if (rc) { +@@ -2222,7 +2222,7 @@ magiccheck(struct magic_set *ms, struct + search = CCAST(char *, ""); + copy = NULL; + } +- rc = file_regexec(rx, RCAST(const char *, search), ++ rc = file_regexec(ms, rx, RCAST(const char *, search), + 1, &pmatch, 0); + free(copy); + switch (rc) {