From 7d438e28c16773e28a3707935c8e5d9927a515a7 Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Sat, 19 Mar 2022 19:52:09 +0000 Subject: [PATCH] Now that we are cacheing regex's we cannot assume that we always do regcomp->regexec->regfree, so this causes memory corruption (and increased memory use with all the locale copies) in xlocale systems. Instead save and restore locales in regcomp and regexec as needed. --- src/apprentice.c | 13 +++++++++-- src/file.h | 18 +++++++-------- src/funcs.c | 57 +++++++++++++++++++++++++++++------------------- src/softmagic.c | 11 +++++----- 4 files changed, 59 insertions(+), 40 deletions(-) Index: file-5.41/src/apprentice.c =================================================================== --- file-5.41.orig/src/apprentice.c +++ file-5.41/src/apprentice.c @@ -516,6 +516,9 @@ file_ms_free(struct magic_set *ms) free(ms->o.pbuf); free(ms->o.buf); free(ms->c.li); +#ifdef USE_C_LOCALE + freelocale(ms->c_lc_ctype); +#endif free(ms); } @@ -555,6 +558,10 @@ file_ms_alloc(int flags) ms->regex_max = FILE_REGEX_MAX; ms->bytes_max = FILE_BYTES_MAX; ms->encoding_max = FILE_ENCODING_MAX; +#ifdef USE_C_LOCALE + ms->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); + assert(ms->c_lc_ctype != NULL); +#endif return ms; free: free(ms); @@ -628,6 +635,7 @@ mlist_free_one(struct mlist *ml) if (ml->magic_rxcomp[i]) { file_regfree(ml->magic_rxcomp[i]); free(ml->magic_rxcomp[i]); + ml->magic_rxcomp[i] = NULL; } } free(ml->magic_rxcomp); @@ -2741,7 +2749,8 @@ getvalue(struct magic_set *ms, struct ma } if (m->type == FILE_REGEX) { file_regex_t rx; - int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED); + int rc = file_regcomp(ms, &rx, m->value.s, + REG_EXTENDED); if (rc) { if (ms->flags & MAGIC_CHECK) file_regerror(&rx, rc, ms); Index: file-5.41/src/file.h =================================================================== --- file-5.41.orig/src/file.h +++ file-5.41/src/file.h @@ -173,13 +173,6 @@ typedef struct { const char *pat; -#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE) -#define USE_C_LOCALE - locale_t old_lc_ctype; - locale_t c_lc_ctype; -#else - char *old_lc_ctype; -#endif int rc; regex_t rx; } file_regex_t; @@ -495,6 +488,10 @@ struct magic_set { #define FILE_NAME_MAX 50 #define FILE_REGEX_MAX 8192 #define FILE_ENCODING_MAX (64 * 1024) +#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE) +#define USE_C_LOCALE + locale_t c_lc_ctype; +#endif }; /* Type for Unicode characters */ @@ -588,9 +585,10 @@ protected int buffer_fill(const struct b -protected int file_regcomp(file_regex_t *, const char *, int); -protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *, +protected int file_regcomp(struct magic_set *, file_regex_t *, const char *, int); +protected int file_regexec(struct magic_set *, file_regex_t *, const char *, + size_t, regmatch_t *, int); protected void file_regfree(file_regex_t *); protected void file_regerror(file_regex_t *, int, struct magic_set *); Index: file-5.41/src/funcs.c =================================================================== --- file-5.41.orig/src/funcs.c +++ file-5.41/src/funcs.c @@ -634,13 +634,13 @@ file_replace(struct magic_set *ms, const file_regex_t rx; int rc, rv = -1; - rc = file_regcomp(&rx, pat, REG_EXTENDED); + rc = file_regcomp(ms, &rx, pat, REG_EXTENDED); if (rc) { file_regerror(&rx, rc, ms); } else { regmatch_t rm; int nm = 0; - while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { + while (file_regexec(ms, &rx, ms->o.buf, 1, &rm, 0) == 0) { ms->o.buf[rm.rm_so] = '\0'; if (file_printf(ms, "%s%s", rep, rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) @@ -655,34 +655,52 @@ out: } protected int -file_regcomp(file_regex_t *rx, const char *pat, int flags) +file_regcomp(struct magic_set *ms, file_regex_t *rx, const char *pat, int flags) { #ifdef USE_C_LOCALE - rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); - assert(rx->c_lc_ctype != NULL); - rx->old_lc_ctype = uselocale(rx->c_lc_ctype); - assert(rx->old_lc_ctype != NULL); + locale_t old = uselocale(ms->c_lc_ctype); + assert(old != NULL); #else - rx->old_lc_ctype = setlocale(LC_CTYPE, NULL); - assert(rx->old_lc_ctype != NULL); - rx->old_lc_ctype = strdup(rx->old_lc_ctype); - assert(rx->old_lc_ctype != NULL); + char old[1024]; + strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); (void)setlocale(LC_CTYPE, "C"); #endif rx->pat = pat; - return rx->rc = regcomp(&rx->rx, pat, flags); + rx->rc = regcomp(&rx->rx, pat, flags); + +#ifdef USE_C_LOCALE + uselocale(old); +#else + (void)setlocale(LC_CTYPE, old); +#endif + return rx->rc; } protected int -file_regexec(file_regex_t *rx, const char *str, size_t nmatch, - regmatch_t* pmatch, int eflags) +file_regexec(struct magic_set *ms, file_regex_t *rx, const char *str, + size_t nmatch, regmatch_t* pmatch, int eflags) { +#ifdef USE_C_LOCALE + locale_t old = uselocale(ms->c_lc_ctype); + assert(old != NULL); +#else + char old[1024]; + strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); + (void)setlocale(LC_CTYPE, "C"); +#endif + int rc; assert(rx->rc == 0); /* XXX: force initialization because glibc does not always do this */ if (nmatch != 0) memset(pmatch, 0, nmatch * sizeof(*pmatch)); - return regexec(&rx->rx, str, nmatch, pmatch, eflags); + rc = regexec(&rx->rx, str, nmatch, pmatch, eflags); +#ifdef USE_C_LOCALE + uselocale(old); +#else + (void)setlocale(LC_CTYPE, old); +#endif + return rc; } protected void @@ -690,13 +708,6 @@ file_regfree(file_regex_t *rx) { if (rx->rc == 0) regfree(&rx->rx); -#ifdef USE_C_LOCALE - (void)uselocale(rx->old_lc_ctype); - freelocale(rx->c_lc_ctype); -#else - (void)setlocale(LC_CTYPE, rx->old_lc_ctype); - free(rx->old_lc_ctype); -#endif } protected void Index: file-5.41/src/softmagic.c =================================================================== --- file-5.41.orig/src/softmagic.c +++ file-5.41/src/softmagic.c @@ -479,11 +479,11 @@ check_fmt(struct magic_set *ms, const ch if (strchr(fmt, '%') == NULL) return 0; - rc = file_regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); + rc = file_regcomp(ms, &rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); if (rc) { file_regerror(&rx, rc, ms); } else { - rc = file_regexec(&rx, fmt, 0, 0, 0); + rc = file_regexec(ms, &rx, fmt, 0, 0, 0); rv = !rc; } file_regfree(&rx); @@ -2192,7 +2192,7 @@ magiccheck(struct magic_set *ms, struct if (rx == NULL) { rx = *m_cache = CAST(file_regex_t*, malloc(sizeof(file_regex_t))); - rc = file_regcomp(rx, m->value.s, + rc = file_regcomp(ms, rx, m->value.s, REG_EXTENDED|REG_NEWLINE| ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); if (rc) { @@ -2222,7 +2222,7 @@ magiccheck(struct magic_set *ms, struct search = CCAST(char *, ""); copy = NULL; } - rc = file_regexec(rx, RCAST(const char *, search), + rc = file_regexec(ms, rx, RCAST(const char *, search), 1, &pmatch, 0); free(copy); switch (rc) {