From c80065fe6900be5e794941e29b32440e9969b1c3 Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Mon, 4 Jul 2022 19:44:35 +0000 Subject: [PATCH 1/5] PR/362: ro-ee: fix wide char printing --- src/file.c | 88 +++++++++++++++++++++++++++++++++++++++++------------ src/file.h | 4 +-- src/funcs.c | 4 +-- 3 files changed, 73 insertions(+), 23 deletions(-) diff --git src/file.c src/file.c index bb058ce1..5e89137d 100644 --- src/file.c +++ src/file.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: file.c,v 1.196 2022/07/04 17:00:51 christos Exp $") +FILE_RCSID("@(#)$File: file.c,v 1.197 2022/07/04 19:44:35 christos Exp $") #endif /* lint */ #include "magic.h" @@ -60,6 +60,12 @@ FILE_RCSID("@(#)$File: file.c,v 1.196 2022/07/04 17:00:51 christos Exp $") #ifdef HAVE_WCTYPE_H #include #endif +#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) && \ + defined(HAVE_WCTYPE_H) +#define FILE_WIDE_SUPPORT +#else +#include +#endif #if defined(HAVE_GETOPT_H) && defined(HAVE_STRUCT_OPTION) # include @@ -550,6 +556,55 @@ out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list"); return e; } +private void +file_octal(unsigned char c) +{ + putc('\\', stdout); + putc(((c >> 6) & 7) + '0', stdout); + putc(((c >> 3) & 7) + '0', stdout); + putc(((c >> 0) & 7) + '0', stdout); +} + +private void +fname_print(const char *inname) +{ + size_t n = strlen(inname); +#ifdef FILE_WIDE_SUPPORT + mbstate_t state; + wchar_t nextchar; + size_t bytesconsumed; + + + (void)mbrlen(NULL, 0, &state); + while (n > 0) { + bytesconsumed = mbrtowc(&nextchar, inname, n, &state); + if (bytesconsumed == CAST(size_t, -1) || + bytesconsumed == CAST(size_t, -2)) { + nextchar = *inname; + bytesconsumed = 1; + } + inname += bytesconsumed; + n -= bytesconsumed; + if (iswprint(nextchar)) { + putwc(nextchar, stdout); + continue; + } + /* XXX: What if it is > 255? */ + file_octal(CAST(unsigned char, nextchar)); + } +#else + size_t i; + for (i = 0; i < n; i++) { + unsigned char c = CAST(unsigned char, inname[i]); + if (isprint(c)) { + putc(c); + continue; + } + file_octal(c); + } +#endif +} + /* * Called for each input file on the command line (or in a list of files) */ @@ -559,15 +614,13 @@ process(struct magic_set *ms, const char *inname, int wid) const char *type, c = nulsep > 1 ? '\0' : '\n'; int std_in = strcmp(inname, "-") == 0; int haderror = 0; - size_t plen = 4 * wid + 1; - char *pbuf, *pname; - - if ((pbuf = CAST(char *, malloc(plen))) == NULL) - file_err(EXIT_FAILURE, "Can't allocate %zu bytes", plen); if (wid > 0 && !bflag) { - pname = file_printable(ms, pbuf, plen, inname, wid); - (void)printf("%s", std_in ? "/dev/stdin" : pname); + const char *pname = std_in ? "/dev/stdin" : inname; + if ((ms->flags & MAGIC_RAW) == 0) + fname_print(pname); + else + (void)printf("%s", pname); if (nulsep) (void)putc('\0', stdout); if (nulsep < 2) { @@ -586,7 +639,6 @@ process(struct magic_set *ms, const char *inname, int wid) } if (nobuffer) haderror |= fflush(stdout) != 0; - free(pbuf); return haderror || type == NULL; } @@ -594,35 +646,33 @@ protected size_t file_mbswidth(struct magic_set *ms, const char *s) { size_t width = 0; -#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) && \ - defined(HAVE_WCTYPE_H) - size_t bytesconsumed, old_n, n; +#ifdef FILE_WIDE_SUPPORT + size_t bytesconsumed, n; mbstate_t state; wchar_t nextchar; - (void)memset(&state, 0, sizeof(mbstate_t)); - old_n = n = strlen(s); + + (void)mbrlen(NULL, 0, &state); + n = strlen(s); while (n > 0) { bytesconsumed = mbrtowc(&nextchar, s, n, &state); if (bytesconsumed == CAST(size_t, -1) || bytesconsumed == CAST(size_t, -2)) { - /* Something went wrong, return something reasonable */ - return old_n; + nextchar = *s; + bytesconsumed = 1; } width += ((ms->flags & MAGIC_RAW) != 0 || iswprint(nextchar)) ? wcwidth(nextchar) : 4; s += bytesconsumed, n -= bytesconsumed; } - return width; #else while (*s) { width += (ms->flags & MAGIC_RAW) != 0 || isprint(CAST(unsigned char, *s)) ? 1 : 4; } - - return strlen(s); #endif + return width; } private void diff --git src/file.h src/file.h index 343f62ea..69aad1dc 100644 --- src/file.h +++ src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.234 2022/05/28 20:24:09 christos Exp $ + * @(#)$File: file.h,v 1.235 2022/07/04 19:44:35 christos Exp $ */ #ifndef __file_h__ @@ -575,7 +575,7 @@ protected size_t file_pstring_length_size(struct magic_set *, const struct magic *); protected size_t file_pstring_get_length(struct magic_set *, const struct magic *, const char *); -public char * file_printable(struct magic_set *, char *, size_t, +protected char * file_printable(struct magic_set *, char *, size_t, const char *, size_t); #ifdef __EMX__ protected int file_os2_apptype(struct magic_set *, const char *, const void *, diff --git src/funcs.c src/funcs.c index 71041441..7186435c 100644 --- src/funcs.c +++ src/funcs.c @@ -27,7 +27,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: funcs.c,v 1.129 2022/05/28 20:24:09 christos Exp $") +FILE_RCSID("@(#)$File: funcs.c,v 1.130 2022/07/04 19:44:35 christos Exp $") #endif /* lint */ #include "magic.h" @@ -763,7 +763,7 @@ file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb) /* * convert string to ascii printable format. */ -public char * +protected char * file_printable(struct magic_set *ms, char *buf, size_t bufsiz, const char *str, size_t slen) { -- 2.35.3 From 7e59d34206d7c962e093d4239e5367a2cd8b7623 Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Mon, 4 Jul 2022 20:16:29 +0000 Subject: [PATCH 2/5] Handle invalid characters as octal (idea from PR/363 by dimich) --- src/file.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git src/file.c src/file.c index 5e89137d..af9be0f0 100644 --- src/file.c +++ src/file.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: file.c,v 1.197 2022/07/04 19:44:35 christos Exp $") +FILE_RCSID("@(#)$File: file.c,v 1.198 2022/07/04 20:16:29 christos Exp $") #endif /* lint */ #include "magic.h" @@ -580,8 +580,11 @@ fname_print(const char *inname) bytesconsumed = mbrtowc(&nextchar, inname, n, &state); if (bytesconsumed == CAST(size_t, -1) || bytesconsumed == CAST(size_t, -2)) { - nextchar = *inname; - bytesconsumed = 1; + nextchar = *inname++; + n--; + (void)mbrlen(NULL, 0, &state); + file_octal(CAST(unsigned char, nextchar)); + continue; } inname += bytesconsumed; n -= bytesconsumed; @@ -660,9 +663,12 @@ file_mbswidth(struct magic_set *ms, const char *s) bytesconsumed == CAST(size_t, -2)) { nextchar = *s; bytesconsumed = 1; + (void)mbrlen(NULL, 0, &state); + width += 4; + } else { + width += ((ms->flags & MAGIC_RAW) != 0 + || iswprint(nextchar)) ? wcwidth(nextchar) : 4; } - width += ((ms->flags & MAGIC_RAW) != 0 - || iswprint(nextchar)) ? wcwidth(nextchar) : 4; s += bytesconsumed, n -= bytesconsumed; } -- 2.35.3 From f042050f59bfc037677871c4d1037c33273f5213 Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Mon, 4 Jul 2022 22:30:51 +0000 Subject: [PATCH 3/5] mbrlen(NULL, is not portable; revert to using memset to initialize the state. --- src/file.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git src/file.c src/file.c index af9be0f0..8b4f14c2 100644 --- src/file.c +++ src/file.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: file.c,v 1.198 2022/07/04 20:16:29 christos Exp $") +FILE_RCSID("@(#)$File: file.c,v 1.199 2022/07/04 22:30:51 christos Exp $") #endif /* lint */ #include "magic.h" @@ -575,14 +575,14 @@ fname_print(const char *inname) size_t bytesconsumed; - (void)mbrlen(NULL, 0, &state); + (void)memset(&state, 0, sizeof(state)); while (n > 0) { bytesconsumed = mbrtowc(&nextchar, inname, n, &state); if (bytesconsumed == CAST(size_t, -1) || bytesconsumed == CAST(size_t, -2)) { nextchar = *inname++; n--; - (void)mbrlen(NULL, 0, &state); + (void)memset(&state, 0, sizeof(state)); file_octal(CAST(unsigned char, nextchar)); continue; } @@ -654,7 +654,7 @@ file_mbswidth(struct magic_set *ms, const char *s) mbstate_t state; wchar_t nextchar; - (void)mbrlen(NULL, 0, &state); + (void)memset(&state, 0, sizeof(state)); n = strlen(s); while (n > 0) { @@ -663,7 +663,7 @@ file_mbswidth(struct magic_set *ms, const char *s) bytesconsumed == CAST(size_t, -2)) { nextchar = *s; bytesconsumed = 1; - (void)mbrlen(NULL, 0, &state); + (void)memset(&state, 0, sizeof(state)); width += 4; } else { width += ((ms->flags & MAGIC_RAW) != 0 -- 2.35.3 From d471022b2772071877895759f209f2c346757a4c Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Tue, 5 Jul 2022 19:53:42 +0000 Subject: [PATCH 4/5] Use printf("%lc") instead of putwc(). Somehow mixing wide and narrow stdio does not work on Linux? --- src/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git src/file.c src/file.c index 8b4f14c2..6ad3a0f8 100644 --- src/file.c +++ src/file.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: file.c,v 1.199 2022/07/04 22:30:51 christos Exp $") +FILE_RCSID("@(#)$File: file.c,v 1.200 2022/07/05 19:53:42 christos Exp $") #endif /* lint */ #include "magic.h" @@ -589,7 +589,7 @@ fname_print(const char *inname) inname += bytesconsumed; n -= bytesconsumed; if (iswprint(nextchar)) { - putwc(nextchar, stdout); + printf("%lc", nextchar); continue; } /* XXX: What if it is > 255? */ -- 2.35.3 From 441ac2b15508909e82ad467960df4ac0adf9644c Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Tue, 5 Jul 2022 20:05:23 +0000 Subject: [PATCH 5/5] wcwidth is not supposed to return -1 if the character is printable, but it does for 0xff... Prevent it from decreasing the width. --- src/file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git src/file.c src/file.c index 6ad3a0f8..e169c08f 100644 --- src/file.c +++ src/file.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: file.c,v 1.200 2022/07/05 19:53:42 christos Exp $") +FILE_RCSID("@(#)$File: file.c,v 1.201 2022/07/05 20:05:23 christos Exp $") #endif /* lint */ #include "magic.h" @@ -666,8 +666,9 @@ file_mbswidth(struct magic_set *ms, const char *s) (void)memset(&state, 0, sizeof(state)); width += 4; } else { + int w = wcwidth(nextchar); width += ((ms->flags & MAGIC_RAW) != 0 - || iswprint(nextchar)) ? wcwidth(nextchar) : 4; + || iswprint(nextchar)) ? (w > 0 ? w : 1) : 4; } s += bytesconsumed, n -= bytesconsumed; -- 2.35.3