406 lines
11 KiB
Plaintext
406 lines
11 KiB
Plaintext
From c80065fe6900be5e794941e29b32440e9969b1c3 Mon Sep 17 00:00:00 2001
|
|
From: Christos Zoulas <christos@zoulas.com>
|
|
Date: Mon, 4 Jul 2022 19:44:35 +0000
|
|
Subject: [PATCH 1/5] PR/362: ro-ee: fix wide char printing
|
|
|
|
---
|
|
src/file.c | 88 +++++++++++++++++++++++++++++++++++++++++------------
|
|
src/file.h | 4 +--
|
|
src/funcs.c | 4 +--
|
|
3 files changed, 73 insertions(+), 23 deletions(-)
|
|
|
|
diff --git src/file.c src/file.c
|
|
index bb058ce1..5e89137d 100644
|
|
--- src/file.c
|
|
+++ src/file.c
|
|
@@ -32,7 +32,7 @@
|
|
#include "file.h"
|
|
|
|
#ifndef lint
|
|
-FILE_RCSID("@(#)$File: file.c,v 1.196 2022/07/04 17:00:51 christos Exp $")
|
|
+FILE_RCSID("@(#)$File: file.c,v 1.197 2022/07/04 19:44:35 christos Exp $")
|
|
#endif /* lint */
|
|
|
|
#include "magic.h"
|
|
@@ -60,6 +60,12 @@ FILE_RCSID("@(#)$File: file.c,v 1.196 2022/07/04 17:00:51 christos Exp $")
|
|
#ifdef HAVE_WCTYPE_H
|
|
#include <wctype.h>
|
|
#endif
|
|
+#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) && \
|
|
+ defined(HAVE_WCTYPE_H)
|
|
+#define FILE_WIDE_SUPPORT
|
|
+#else
|
|
+#include <ctype.h>
|
|
+#endif
|
|
|
|
#if defined(HAVE_GETOPT_H) && defined(HAVE_STRUCT_OPTION)
|
|
# include <getopt.h>
|
|
@@ -550,6 +556,55 @@ out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list");
|
|
return e;
|
|
}
|
|
|
|
+private void
|
|
+file_octal(unsigned char c)
|
|
+{
|
|
+ putc('\\', stdout);
|
|
+ putc(((c >> 6) & 7) + '0', stdout);
|
|
+ putc(((c >> 3) & 7) + '0', stdout);
|
|
+ putc(((c >> 0) & 7) + '0', stdout);
|
|
+}
|
|
+
|
|
+private void
|
|
+fname_print(const char *inname)
|
|
+{
|
|
+ size_t n = strlen(inname);
|
|
+#ifdef FILE_WIDE_SUPPORT
|
|
+ mbstate_t state;
|
|
+ wchar_t nextchar;
|
|
+ size_t bytesconsumed;
|
|
+
|
|
+
|
|
+ (void)mbrlen(NULL, 0, &state);
|
|
+ while (n > 0) {
|
|
+ bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
|
|
+ if (bytesconsumed == CAST(size_t, -1) ||
|
|
+ bytesconsumed == CAST(size_t, -2)) {
|
|
+ nextchar = *inname;
|
|
+ bytesconsumed = 1;
|
|
+ }
|
|
+ inname += bytesconsumed;
|
|
+ n -= bytesconsumed;
|
|
+ if (iswprint(nextchar)) {
|
|
+ putwc(nextchar, stdout);
|
|
+ continue;
|
|
+ }
|
|
+ /* XXX: What if it is > 255? */
|
|
+ file_octal(CAST(unsigned char, nextchar));
|
|
+ }
|
|
+#else
|
|
+ size_t i;
|
|
+ for (i = 0; i < n; i++) {
|
|
+ unsigned char c = CAST(unsigned char, inname[i]);
|
|
+ if (isprint(c)) {
|
|
+ putc(c);
|
|
+ continue;
|
|
+ }
|
|
+ file_octal(c);
|
|
+ }
|
|
+#endif
|
|
+}
|
|
+
|
|
/*
|
|
* Called for each input file on the command line (or in a list of files)
|
|
*/
|
|
@@ -559,15 +614,13 @@ process(struct magic_set *ms, const char *inname, int wid)
|
|
const char *type, c = nulsep > 1 ? '\0' : '\n';
|
|
int std_in = strcmp(inname, "-") == 0;
|
|
int haderror = 0;
|
|
- size_t plen = 4 * wid + 1;
|
|
- char *pbuf, *pname;
|
|
-
|
|
- if ((pbuf = CAST(char *, malloc(plen))) == NULL)
|
|
- file_err(EXIT_FAILURE, "Can't allocate %zu bytes", plen);
|
|
|
|
if (wid > 0 && !bflag) {
|
|
- pname = file_printable(ms, pbuf, plen, inname, wid);
|
|
- (void)printf("%s", std_in ? "/dev/stdin" : pname);
|
|
+ const char *pname = std_in ? "/dev/stdin" : inname;
|
|
+ if ((ms->flags & MAGIC_RAW) == 0)
|
|
+ fname_print(pname);
|
|
+ else
|
|
+ (void)printf("%s", pname);
|
|
if (nulsep)
|
|
(void)putc('\0', stdout);
|
|
if (nulsep < 2) {
|
|
@@ -586,7 +639,6 @@ process(struct magic_set *ms, const char *inname, int wid)
|
|
}
|
|
if (nobuffer)
|
|
haderror |= fflush(stdout) != 0;
|
|
- free(pbuf);
|
|
return haderror || type == NULL;
|
|
}
|
|
|
|
@@ -594,35 +646,33 @@ protected size_t
|
|
file_mbswidth(struct magic_set *ms, const char *s)
|
|
{
|
|
size_t width = 0;
|
|
-#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) && \
|
|
- defined(HAVE_WCTYPE_H)
|
|
- size_t bytesconsumed, old_n, n;
|
|
+#ifdef FILE_WIDE_SUPPORT
|
|
+ size_t bytesconsumed, n;
|
|
mbstate_t state;
|
|
wchar_t nextchar;
|
|
- (void)memset(&state, 0, sizeof(mbstate_t));
|
|
- old_n = n = strlen(s);
|
|
+
|
|
+ (void)mbrlen(NULL, 0, &state);
|
|
+ n = strlen(s);
|
|
|
|
while (n > 0) {
|
|
bytesconsumed = mbrtowc(&nextchar, s, n, &state);
|
|
if (bytesconsumed == CAST(size_t, -1) ||
|
|
bytesconsumed == CAST(size_t, -2)) {
|
|
- /* Something went wrong, return something reasonable */
|
|
- return old_n;
|
|
+ nextchar = *s;
|
|
+ bytesconsumed = 1;
|
|
}
|
|
width += ((ms->flags & MAGIC_RAW) != 0
|
|
|| iswprint(nextchar)) ? wcwidth(nextchar) : 4;
|
|
|
|
s += bytesconsumed, n -= bytesconsumed;
|
|
}
|
|
- return width;
|
|
#else
|
|
while (*s) {
|
|
width += (ms->flags & MAGIC_RAW) != 0
|
|
|| isprint(CAST(unsigned char, *s)) ? 1 : 4;
|
|
}
|
|
-
|
|
- return strlen(s);
|
|
#endif
|
|
+ return width;
|
|
}
|
|
|
|
private void
|
|
diff --git src/file.h src/file.h
|
|
index 343f62ea..69aad1dc 100644
|
|
--- src/file.h
|
|
+++ src/file.h
|
|
@@ -27,7 +27,7 @@
|
|
*/
|
|
/*
|
|
* file.h - definitions for file(1) program
|
|
- * @(#)$File: file.h,v 1.234 2022/05/28 20:24:09 christos Exp $
|
|
+ * @(#)$File: file.h,v 1.235 2022/07/04 19:44:35 christos Exp $
|
|
*/
|
|
|
|
#ifndef __file_h__
|
|
@@ -575,7 +575,7 @@ protected size_t file_pstring_length_size(struct magic_set *,
|
|
const struct magic *);
|
|
protected size_t file_pstring_get_length(struct magic_set *,
|
|
const struct magic *, const char *);
|
|
-public char * file_printable(struct magic_set *, char *, size_t,
|
|
+protected char * file_printable(struct magic_set *, char *, size_t,
|
|
const char *, size_t);
|
|
#ifdef __EMX__
|
|
protected int file_os2_apptype(struct magic_set *, const char *, const void *,
|
|
diff --git src/funcs.c src/funcs.c
|
|
index 71041441..7186435c 100644
|
|
--- src/funcs.c
|
|
+++ src/funcs.c
|
|
@@ -27,7 +27,7 @@
|
|
#include "file.h"
|
|
|
|
#ifndef lint
|
|
-FILE_RCSID("@(#)$File: funcs.c,v 1.129 2022/05/28 20:24:09 christos Exp $")
|
|
+FILE_RCSID("@(#)$File: funcs.c,v 1.130 2022/07/04 19:44:35 christos Exp $")
|
|
#endif /* lint */
|
|
|
|
#include "magic.h"
|
|
@@ -763,7 +763,7 @@ file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb)
|
|
/*
|
|
* convert string to ascii printable format.
|
|
*/
|
|
-public char *
|
|
+protected char *
|
|
file_printable(struct magic_set *ms, char *buf, size_t bufsiz,
|
|
const char *str, size_t slen)
|
|
{
|
|
--
|
|
2.35.3
|
|
|
|
|
|
From 7e59d34206d7c962e093d4239e5367a2cd8b7623 Mon Sep 17 00:00:00 2001
|
|
From: Christos Zoulas <christos@zoulas.com>
|
|
Date: Mon, 4 Jul 2022 20:16:29 +0000
|
|
Subject: [PATCH 2/5] Handle invalid characters as octal (idea from PR/363 by
|
|
dimich)
|
|
|
|
---
|
|
src/file.c | 16 +++++++++++-----
|
|
1 file changed, 11 insertions(+), 5 deletions(-)
|
|
|
|
diff --git src/file.c src/file.c
|
|
index 5e89137d..af9be0f0 100644
|
|
--- src/file.c
|
|
+++ src/file.c
|
|
@@ -32,7 +32,7 @@
|
|
#include "file.h"
|
|
|
|
#ifndef lint
|
|
-FILE_RCSID("@(#)$File: file.c,v 1.197 2022/07/04 19:44:35 christos Exp $")
|
|
+FILE_RCSID("@(#)$File: file.c,v 1.198 2022/07/04 20:16:29 christos Exp $")
|
|
#endif /* lint */
|
|
|
|
#include "magic.h"
|
|
@@ -580,8 +580,11 @@ fname_print(const char *inname)
|
|
bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
|
|
if (bytesconsumed == CAST(size_t, -1) ||
|
|
bytesconsumed == CAST(size_t, -2)) {
|
|
- nextchar = *inname;
|
|
- bytesconsumed = 1;
|
|
+ nextchar = *inname++;
|
|
+ n--;
|
|
+ (void)mbrlen(NULL, 0, &state);
|
|
+ file_octal(CAST(unsigned char, nextchar));
|
|
+ continue;
|
|
}
|
|
inname += bytesconsumed;
|
|
n -= bytesconsumed;
|
|
@@ -660,9 +663,12 @@ file_mbswidth(struct magic_set *ms, const char *s)
|
|
bytesconsumed == CAST(size_t, -2)) {
|
|
nextchar = *s;
|
|
bytesconsumed = 1;
|
|
+ (void)mbrlen(NULL, 0, &state);
|
|
+ width += 4;
|
|
+ } else {
|
|
+ width += ((ms->flags & MAGIC_RAW) != 0
|
|
+ || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
|
|
}
|
|
- width += ((ms->flags & MAGIC_RAW) != 0
|
|
- || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
|
|
|
|
s += bytesconsumed, n -= bytesconsumed;
|
|
}
|
|
--
|
|
2.35.3
|
|
|
|
|
|
From f042050f59bfc037677871c4d1037c33273f5213 Mon Sep 17 00:00:00 2001
|
|
From: Christos Zoulas <christos@zoulas.com>
|
|
Date: Mon, 4 Jul 2022 22:30:51 +0000
|
|
Subject: [PATCH 3/5] mbrlen(NULL, is not portable; revert to using memset to
|
|
initialize the state.
|
|
|
|
---
|
|
src/file.c | 10 +++++-----
|
|
1 file changed, 5 insertions(+), 5 deletions(-)
|
|
|
|
diff --git src/file.c src/file.c
|
|
index af9be0f0..8b4f14c2 100644
|
|
--- src/file.c
|
|
+++ src/file.c
|
|
@@ -32,7 +32,7 @@
|
|
#include "file.h"
|
|
|
|
#ifndef lint
|
|
-FILE_RCSID("@(#)$File: file.c,v 1.198 2022/07/04 20:16:29 christos Exp $")
|
|
+FILE_RCSID("@(#)$File: file.c,v 1.199 2022/07/04 22:30:51 christos Exp $")
|
|
#endif /* lint */
|
|
|
|
#include "magic.h"
|
|
@@ -575,14 +575,14 @@ fname_print(const char *inname)
|
|
size_t bytesconsumed;
|
|
|
|
|
|
- (void)mbrlen(NULL, 0, &state);
|
|
+ (void)memset(&state, 0, sizeof(state));
|
|
while (n > 0) {
|
|
bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
|
|
if (bytesconsumed == CAST(size_t, -1) ||
|
|
bytesconsumed == CAST(size_t, -2)) {
|
|
nextchar = *inname++;
|
|
n--;
|
|
- (void)mbrlen(NULL, 0, &state);
|
|
+ (void)memset(&state, 0, sizeof(state));
|
|
file_octal(CAST(unsigned char, nextchar));
|
|
continue;
|
|
}
|
|
@@ -654,7 +654,7 @@ file_mbswidth(struct magic_set *ms, const char *s)
|
|
mbstate_t state;
|
|
wchar_t nextchar;
|
|
|
|
- (void)mbrlen(NULL, 0, &state);
|
|
+ (void)memset(&state, 0, sizeof(state));
|
|
n = strlen(s);
|
|
|
|
while (n > 0) {
|
|
@@ -663,7 +663,7 @@ file_mbswidth(struct magic_set *ms, const char *s)
|
|
bytesconsumed == CAST(size_t, -2)) {
|
|
nextchar = *s;
|
|
bytesconsumed = 1;
|
|
- (void)mbrlen(NULL, 0, &state);
|
|
+ (void)memset(&state, 0, sizeof(state));
|
|
width += 4;
|
|
} else {
|
|
width += ((ms->flags & MAGIC_RAW) != 0
|
|
--
|
|
2.35.3
|
|
|
|
|
|
From d471022b2772071877895759f209f2c346757a4c Mon Sep 17 00:00:00 2001
|
|
From: Christos Zoulas <christos@zoulas.com>
|
|
Date: Tue, 5 Jul 2022 19:53:42 +0000
|
|
Subject: [PATCH 4/5] Use printf("%lc") instead of putwc(). Somehow mixing wide
|
|
and narrow stdio does not work on Linux?
|
|
|
|
---
|
|
src/file.c | 4 ++--
|
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
|
|
diff --git src/file.c src/file.c
|
|
index 8b4f14c2..6ad3a0f8 100644
|
|
--- src/file.c
|
|
+++ src/file.c
|
|
@@ -32,7 +32,7 @@
|
|
#include "file.h"
|
|
|
|
#ifndef lint
|
|
-FILE_RCSID("@(#)$File: file.c,v 1.199 2022/07/04 22:30:51 christos Exp $")
|
|
+FILE_RCSID("@(#)$File: file.c,v 1.200 2022/07/05 19:53:42 christos Exp $")
|
|
#endif /* lint */
|
|
|
|
#include "magic.h"
|
|
@@ -589,7 +589,7 @@ fname_print(const char *inname)
|
|
inname += bytesconsumed;
|
|
n -= bytesconsumed;
|
|
if (iswprint(nextchar)) {
|
|
- putwc(nextchar, stdout);
|
|
+ printf("%lc", nextchar);
|
|
continue;
|
|
}
|
|
/* XXX: What if it is > 255? */
|
|
--
|
|
2.35.3
|
|
|
|
|
|
From 441ac2b15508909e82ad467960df4ac0adf9644c Mon Sep 17 00:00:00 2001
|
|
From: Christos Zoulas <christos@zoulas.com>
|
|
Date: Tue, 5 Jul 2022 20:05:23 +0000
|
|
Subject: [PATCH 5/5] wcwidth is not supposed to return -1 if the character is
|
|
printable, but it does for 0xff... Prevent it from decreasing the width.
|
|
|
|
---
|
|
src/file.c | 5 +++--
|
|
1 file changed, 3 insertions(+), 2 deletions(-)
|
|
|
|
diff --git src/file.c src/file.c
|
|
index 6ad3a0f8..e169c08f 100644
|
|
--- src/file.c
|
|
+++ src/file.c
|
|
@@ -32,7 +32,7 @@
|
|
#include "file.h"
|
|
|
|
#ifndef lint
|
|
-FILE_RCSID("@(#)$File: file.c,v 1.200 2022/07/05 19:53:42 christos Exp $")
|
|
+FILE_RCSID("@(#)$File: file.c,v 1.201 2022/07/05 20:05:23 christos Exp $")
|
|
#endif /* lint */
|
|
|
|
#include "magic.h"
|
|
@@ -666,8 +666,9 @@ file_mbswidth(struct magic_set *ms, const char *s)
|
|
(void)memset(&state, 0, sizeof(state));
|
|
width += 4;
|
|
} else {
|
|
+ int w = wcwidth(nextchar);
|
|
width += ((ms->flags & MAGIC_RAW) != 0
|
|
- || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
|
|
+ || iswprint(nextchar)) ? (w > 0 ? w : 1) : 4;
|
|
}
|
|
|
|
s += bytesconsumed, n -= bytesconsumed;
|
|
--
|
|
2.35.3
|
|
|