--- buffer.c | 8 - buffer.h | 1 charset.c | 65 ++++---- charset.h | 9 - errmsg.c | 12 + errmsg.h | 12 + par.c | 444 ++++++++++++++++++++++++++++++---------------------------- protoMakefile | 8 - reformat.c | 168 +++++++++++++-------- reformat.h | 6 releasenotes | 16 ++ 12 files changed, 423 insertions(+), 328 deletions(-) --- a/buffer.c +++ b/buffer.c @@ -3,6 +3,7 @@ buffer.c last touched in Par 1.53.0 last meaningful change in Par 1.50 Copyright 1993, 1996 Adam M. Costello +Modified by Jérôme Pouiller This is ANSI C code (C89). @@ -20,6 +21,7 @@ Language, Second Edition, by Kerninghan #include #include #include +#include #undef NULL #define NULL ((void *) 0) @@ -62,7 +64,7 @@ buffer *newbuffer(size_t itemsize, errms blk = malloc(sizeof (block)); items = malloc(maxhere * itemsize); if (!buf || !blk || !items) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto nberror; } @@ -129,7 +131,7 @@ void additem(buffer *buf, const void *it new = malloc(sizeof (block)); items = malloc(maxhere * itemsize); if (!new || !items) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto aierror; } blk->next = new; @@ -176,7 +178,7 @@ void *copyitems(buffer *buf, errmsg_t er r = malloc(n * itemsize); if (!r) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); return NULL; } --- a/buffer.h +++ b/buffer.h @@ -3,6 +3,7 @@ buffer.h last touched in Par 1.53.0 last meaningful change in Par 1.31 Copyright 1993 Adam M. Costello +Modified by Jérôme Pouiller This is ANSI C code (C89). --- a/charset.c +++ b/charset.c @@ -3,6 +3,7 @@ charset.c last touched in Par 1.53.0 last meaningful change in Par 1.53.0 Copyright 1993, 2001, 2020 Adam M. Costello +Modified by Jérôme Pouiller This is ANSI C code (C89). @@ -26,6 +27,8 @@ sequence. See the comments near the beg #include #include #include +#include +#include #include #include @@ -40,8 +43,8 @@ sequence. See the comments near the beg typedef unsigned char csflag_t; struct charset { - char *inlist; /* Characters in inlist are in the set. */ - char *outlist; /* Characters in outlist are not in the set. */ + wchar_t *inlist; /* Characters in inlist are in the set. */ + wchar_t *outlist; /* Characters in outlist are not in the set. */ /* inlist and outlist must have no common characters. */ /* inlist and outlist may be NULL, which acts like "". */ csflag_t flags; /* Characters in neither list are in the set if they */ @@ -60,25 +63,25 @@ static const csflag_t CS_NUL = 32; /* Includes the NUL character. */ -static int appearsin(char c, const char *str) +static int appearsin(wchar_t c, const wchar_t *str) /* Returns 0 if c is '\0' or str is NULL or c */ /* does not appear in *str. Otherwise returns 1. */ { - return c && str && strchr(str,c); + return c && str && wcschr(str,c); } -static int hexdigtoint(char c) +static int hexdigtoint(wchar_t c) /* Returns the value represented by the hexadecimal */ /* digit c, or -1 if c is not a hexadecimal digit. */ { - const char *p, * const hexdigits = "0123456789ABCDEFabcdef"; + const wchar_t *p, * const hexdigits = L"0123456789ABCDEFabcdef"; int n; if (!c) return -1; - p = strchr(hexdigits, *(unsigned char *)&c); + p = wcschr(hexdigits, c); if (!p) return -1; n = p - hexdigits; if (n >= 16) n -= 6; @@ -91,39 +94,40 @@ static int hexdigtoint(char c) } -charset *parsecharset(const char *str, errmsg_t errmsg) +charset *parsecharset(const wchar_t *str, errmsg_t errmsg) { charset *cset = NULL; buffer *cbuf = NULL; - const char *p, * const singleescapes = "_sbqQx"; + const wchar_t *p, * const singleescapes = L"_sbqQx"; int hex1, hex2; - char ch; + wchar_t ch; cset = malloc(sizeof (charset)); if (!cset) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto pcserror; } cset->inlist = cset->outlist = NULL; cset->flags = 0; - cbuf = newbuffer(sizeof (char), errmsg); + cbuf = newbuffer(sizeof (wchar_t), errmsg); if (*errmsg) goto pcserror; for (p = str; *p; ++p) - if (*p == '_') { + if (*p == L'_') { ++p; if (appearsin(*p, singleescapes)) { - if (*p == '_') ch = '_' ; - else if (*p == 's') ch = ' ' ; - else if (*p == 'b') ch = '\\'; - else if (*p == 'q') ch = '\''; - else if (*p == 'Q') ch = '\"'; + if (*p == L'_') ch = L'_' ; + else if (*p == L's') ch = L' ' ; + else if (*p == L'b') ch = L'\\'; + else if (*p == L'q') ch = L'\''; + else if (*p == L'Q') ch = L'\"'; else /* *p == 'x' */ { + /* FIXME _x metacharacter should allow wide characters input.*/ hex1 = hexdigtoint(p[1]); hex2 = hexdigtoint(p[2]); if (hex1 < 0 || hex2 < 0) goto pcsbadstr; - *(unsigned char *)&ch = 16 * hex1 + hex2; + ch = 16 * hex1 + hex2; p += 2; } if (!ch) @@ -134,11 +138,11 @@ charset *parsecharset(const char *str, e } } else { - if (*p == 'A') cset->flags |= CS_UCASE; - else if (*p == 'a') cset->flags |= CS_LCASE; - else if (*p == '@') cset->flags |= CS_NCASE; - else if (*p == '0') cset->flags |= CS_DIGIT; - else if (*p == 'S') cset->flags |= CS_SPACE; + if (*p == L'A') cset->flags |= CS_UCASE; + else if (*p == L'a') cset->flags |= CS_LCASE; + else if (*p == L'@') cset->flags |= CS_NCASE; + else if (*p == L'0') cset->flags |= CS_DIGIT; + else if (*p == L'S') cset->flags |= CS_SPACE; else goto pcsbadstr; } } @@ -155,11 +159,12 @@ charset *parsecharset(const char *str, e pcscleanup: if (cbuf) freebuffer(cbuf); + //if (wstr) free(wstr); return cset; pcsbadstr: - sprintf(errmsg, "Bad charset syntax: %.*s\n", errmsg_size - 22, str); + swprintf(errmsg, errmsg_size, L"Bad charset syntax: %.*s\n", errmsg_size - 22, str); pcserror: @@ -177,7 +182,7 @@ void freecharset(charset *cset) } -int csmember(char c, const charset *cset) +int csmember(wchar_t c, const charset *cset) { unsigned char uc; @@ -212,16 +217,16 @@ static charset *csud( { charset *csu; buffer *inbuf = NULL, *outbuf = NULL; - char *lists[4], **list, *p, nullchar = '\0'; + wchar_t *lists[4], **list, *p, nullchar = L'\0'; csu = malloc(sizeof (charset)); if (!csu) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto csuderror; } - inbuf = newbuffer(sizeof (char), errmsg); + inbuf = newbuffer(sizeof (wchar_t), errmsg); if (*errmsg) goto csuderror; - outbuf = newbuffer(sizeof (char), errmsg); + outbuf = newbuffer(sizeof (wchar_t), errmsg); if (*errmsg) goto csuderror; csu->inlist = csu->outlist = NULL; csu->flags = u ? cset1->flags | cset2->flags --- a/charset.h +++ b/charset.h @@ -3,6 +3,7 @@ charset.h last touched in Par 1.53.0 last meaningful change in Par 1.31 Copyright 1993 Adam M. Costello +Modified by Jérôme Pouiller This is ANSI C code (C89). @@ -15,13 +16,17 @@ always succeed, provided that they are p #ifndef CHARSET_H #define CHARSET_H +/* In order to make wchar_t work, we need this definition */ +#define _XOPEN_SOURCE + +#include #include "errmsg.h" typedef struct charset charset; -charset *parsecharset(const char *str, errmsg_t errmsg); +charset *parsecharset(const wchar_t *str, errmsg_t errmsg); /* parsecharset(str,errmsg) returns the set of characters defined by */ /* str using charset syntax (see par.doc). Returns NULL on failure. */ @@ -33,7 +38,7 @@ void freecharset(charset *cset); /* *cset. cset may not be used after this call. */ -int csmember(char c, const charset *cset); +int csmember(wchar_t c, const charset *cset); /* csmember(c,cset) returns 1 if c is a member of *cset, 0 otherwise. */ --- a/errmsg.c +++ b/errmsg.c @@ -3,6 +3,7 @@ errmsg.c last touched in Par 1.53.0 last meaningful change in Par 1.40 Copyright 1993 Adam M. Costello +Modified by Jérôme Pouiller This is ANSI C code (C89). @@ -12,8 +13,11 @@ This is ANSI C code (C89). #include "errmsg.h" /* Makes sure we're consistent with the declarations. */ -const char * const outofmem = - "Out of memory.\n"; +const wchar_t * const outofmem = + L"Out of memory.\n"; -const char * const impossibility = - "Impossibility #%d has occurred. Please report it.\n"; +const wchar_t * const mbserror = + L"Error in input multibyte string.\n"; + +const wchar_t * const impossibility = + L"Impossibility #%d has occurred. Please report it.\n"; --- a/errmsg.h +++ b/errmsg.h @@ -3,6 +3,7 @@ errmsg.h last touched in Par 1.53.0 last meaningful change in Par 1.40 Copyright 1993 Adam M. Costello +Modified by Jérôme Pouiller This is ANSI C code (C89). @@ -12,7 +13,7 @@ This is ANSI C code (C89). #ifndef ERRMSG_H #define ERRMSG_H - +#include #define errmsg_size 163 /* This is the maximum number of characters that will */ @@ -21,7 +22,7 @@ This is ANSI C code (C89). /* versions of this header file. */ -typedef char errmsg_t[errmsg_size]; +typedef wchar_t errmsg_t[errmsg_size]; /* Any function which takes the argument errmsg_t errmsg must, before */ /* returning, either set errmsg[0] to '\0' (indicating success), or */ @@ -29,10 +30,13 @@ typedef char errmsg_t[errmsg_size]; /* being careful not to overrun the space. */ -extern const char * const outofmem; +extern const wchar_t * const outofmem; /* "Out of memory.\n" */ -extern const char * const impossibility; +extern const wchar_t * const mbserror; + /* "Error in input multibyte string.\n" */ + +extern const wchar_t * const impossibility; /* "Impossibility #%d has occurred. Please report it.\n" */ --- a/par.c +++ b/par.c @@ -3,23 +3,26 @@ par.c last touched in Par 1.53.0 last meaningful change in Par 1.53.0 Copyright 1993, 1996, 2001, 2020 Adam M. Costello +Modified by Jérôme Pouiller This is ANSI C code (C89). */ - #include "buffer.h" #include "charset.h" #include "errmsg.h" #include "reformat.h" -#include +#include +#include +#include #include #include #include #include #include +#include #undef NULL #define NULL ((void *) 0) @@ -28,56 +31,6 @@ This is ANSI C code (C89). #define free(ptr) #endif - -/*=== - -Regarding char and unsigned char: ANSI C is a nightmare in this -respect. Some functions, like puts(), strchr(), and getenv(), use char -or char*, so they work well with character constants like 'a', which -are char, and with argv, which is char**. But several other functions, -like getchar(), putchar(), and isdigit(), use unsigned char (converted -to/from int). Therefore innocent-looking code can be wrong, for -example: - - int c = getchar(); - if (c == 'a') ... - -This is wrong because 'a' is char (converted to int) and could be -negative, but getchar() returns unsigned char (converted to int), so c -is always nonnegative or EOF. For similar reasons, it is wrong to pass -a char to a function that expects an unsigned char: - - putchar('\n'); - if (isdigit(argv[1][0])) ... - -Inevitably, we need to convert between char and unsigned char. This can -be done by integral conversion (casting or assigning a char to unsigned -char or vice versa), or by aliasing (converting a pointer to char to -a pointer to unsigned char (or vice versa) and then dereferencing -it). ANSI C requires that integral conversion alters the bits when the -unsigned value is not representable in the signed type and the signed -type does not use two's complement representation. Aliasing, on the -other hand, preserves the bits. Although the C standard is not at all -clear about which sort of conversion is appropriate for making the -standard library functions interoperate, I think preserving the bits -is what is needed. Under that assumption, here are some examples of -correct code: - - int c = getchar(); - char ch; - - if (c != EOF) { - *(unsigned char *)&ch = c; - if (ch == 'a') ... - if (isdigit(c)) ... - } - - char *s = ... - if (isdigit(*(unsigned char *)s)) ... - -===*/ - - static const char * const usagemsg = "\n" "Options for par:\n" @@ -119,7 +72,6 @@ static const char * const usagemsg = "\n" ; - /* Structure for recording properties of lines within segments: */ typedef unsigned char lflag_t; @@ -129,7 +81,7 @@ typedef struct lineprop { /* line, or the fallback prelen and suflen */ /* of the IP containing a non-bodiless line. */ lflag_t flags; /* Boolean properties (see below). */ - char rc; /* The repeated character of a bodiless line. */ + wchar_t rc; /* The repeated character of a bodiless line. */ } lineprop; /* Flags for marking boolean properties: */ @@ -145,15 +97,14 @@ static const lflag_t L_BODILESS = 1, /* #define issuperf(prop) (((prop)->flags & 8) != 0) #define isvacant(prop) (isbodiless(prop) && (prop)->rc == ' ') - -static int digtoint(char c) +static int digtoint(wchar_t c) /* Returns the value represented by the digit c, or -1 if c is not a digit. */ { - const char *p, * const digits = "0123456789"; + const wchar_t *p, * const digits = L"0123456789"; if (!c) return -1; - p = strchr(digits,c); + p = wcschr(digits,c); return p ? p - digits : -1; /* We can't simply return c - '0' because this is ANSI C code, */ @@ -163,8 +114,7 @@ static int digtoint(char c) /* upper limit on CHAR_MAX. */ } - -static int strtoudec(const char *s, int *pn) +static int strtoudec(const wchar_t *s, int *pn) /* Converts the longest prefix of string s consisting of decimal */ /* digits to an integer, which is stored in *pn. Normally returns */ @@ -188,9 +138,8 @@ static int strtoudec(const char *s, int return 1; } - static void parsearg( - const char *arg, + const wchar_t *arg, int *phelp, int *pversion, charset *bodychars, @@ -223,81 +172,81 @@ static void parsearg( /* by the other pointers as appropriate. *phelp and *pversion are boolean */ /* flags indicating whether the help and version options were supplied. */ { - const char *savearg = arg; + const wchar_t *savearg = arg; charset *chars, *change; - char oc; + wchar_t oc; int n; *errmsg = '\0'; - if (*arg == '-') ++arg; + if (*arg == L'-') ++arg; - if (!strcmp(arg, "help")) { + if (!wcscmp(arg, L"help")) { *phelp = 1; return; } - if (!strcmp(arg, "version")) { + if (!wcscmp(arg, L"version")) { *pversion = 1; return; } - chars = *arg == 'B' ? bodychars : - *arg == 'P' ? protectchars : - *arg == 'Q' ? quotechars : - *arg == 'W' ? whitechars : - *arg == 'Z' ? terminalchars : + chars = *arg == L'B' ? bodychars : + *arg == L'P' ? protectchars : + *arg == L'Q' ? quotechars : + *arg == L'W' ? whitechars : + *arg == L'Z' ? terminalchars : NULL; if (chars) { ++arg; - if (*arg != '=' && *arg != '+' && *arg != '-') goto badarg; + if (*arg != L'=' && *arg != L'+' && *arg != L'-') goto badarg; change = parsecharset(arg + 1, errmsg); if (change) { - if (*arg == '=') csswap(chars,change); - else if (*arg == '+') csadd(chars,change,errmsg); - else /* *arg == '-' */ csremove(chars,change,errmsg); + if (*arg == L'=') csswap(chars,change); + else if (*arg == L'+') csadd(chars,change,errmsg); + else /* *arg == L'-' */ csremove(chars,change,errmsg); freecharset(change); } return; } - if (isdigit(*(unsigned char *)arg)) { + if (iswdigit(*arg)) { if (!strtoudec(arg, &n)) goto badarg; if (n <= 8) *pprefix = n; else *pwidth = n; } for (;;) { - while (isdigit(*(unsigned char *)arg)) ++arg; + while (iswdigit(*(unsigned char *)arg)) ++arg; oc = *arg; if (!oc) break; n = -1; if (!strtoudec(++arg, &n)) goto badarg; - if ( oc == 'h' || oc == 'p' || oc == 'r' - || oc == 's' || oc == 'T' || oc == 'w') { - if (oc == 'h') *phang = n >= 0 ? n : 1; - else if (oc == 'p') *pprefix = n; - else if (oc == 'r') *prepeat = n >= 0 ? n : 3; - else if (oc == 's') *psuffix = n; - else if (oc == 'T') *pTab = n >= 0 ? n : 8; - else /* oc == 'w' */ *pwidth = n >= 0 ? n : 79; + if ( oc == L'h' || oc == L'p' || oc == L'r' + || oc == L's' || oc == L'T' || oc == L'w') { + if (oc == L'h') *phang = n >= 0 ? n : 1; + else if (oc == L'p') *pprefix = n; + else if (oc == L'r') *prepeat = n >= 0 ? n : 3; + else if (oc == L's') *psuffix = n; + else if (oc == L'T') *pTab = n >= 0 ? n : 8; + else /* oc == L'w' */ *pwidth = n >= 0 ? n : 79; } else { if (n < 0) n = 1; if (n > 1) goto badarg; - if (oc == 'b') *pbody = n; - else if (oc == 'c') *pcap = n; - else if (oc == 'd') *pdiv = n; - else if (oc == 'E') *pErr = n; - else if (oc == 'e') *pexpel = n; - else if (oc == 'f') *pfit = n; - else if (oc == 'g') *pguess = n; - else if (oc == 'i') *pinvis = n; - else if (oc == 'j') *pjust = n; - else if (oc == 'l') *plast = n; - else if (oc == 'q') *pquote = n; - else if (oc == 'R') *pReport = n; - else if (oc == 't') *ptouch = n; + if (oc == L'b') *pbody = n; + else if (oc == L'c') *pcap = n; + else if (oc == L'd') *pdiv = n; + else if (oc == L'E') *pErr = n; + else if (oc == L'e') *pexpel = n; + else if (oc == L'f') *pfit = n; + else if (oc == L'g') *pguess = n; + else if (oc == L'i') *pinvis = n; + else if (oc == L'j') *pjust = n; + else if (oc == L'l') *plast = n; + else if (oc == L'q') *pquote = n; + else if (oc == L'R') *pReport = n; + else if (oc == L't') *ptouch = n; else goto badarg; } } @@ -306,12 +255,11 @@ static void parsearg( badarg: - sprintf(errmsg, "Bad argument: %.*s\n", errmsg_size - 16, savearg); + swprintf(errmsg, errmsg_size, L"Bad argument: %.*s\n", errmsg_size - 16, savearg); *phelp = 1; } - -static char **readlines( +static wchar_t **readlines( lineprop **pprops, const charset *protectchars, const charset *quotechars, const charset *whitechars, int Tab, int invis, int quote, errmsg_t errmsg @@ -332,9 +280,11 @@ static char **readlines( /* it's not NULL. On failure, returns NULL and sets *pprops to NULL. */ { buffer *cbuf = NULL, *lbuf = NULL, *lpbuf = NULL; - int c, empty, blank, firstline, qsonly, oldqsonly = 0, vlnlen, i; - char ch, *ln = NULL, nullchar = '\0', *nullline = NULL, *qpend, - *oldln = NULL, *oldqpend = NULL, *p, *op, *vln = NULL, **lines = NULL; + wint_t c; + int empty, blank, firstline, qsonly, oldqsonly = 0, vlnlen, i; + wchar_t *ln = NULL, nullchar = L'\0', *nullline = NULL, *qpend, + + *oldln = NULL, *oldqpend = NULL, *p, *op, *vln = NULL, **lines = NULL; lineprop vprop = { 0, 0, 0, '\0' }, iprop = { 0, 0, 0, '\0' }; /* oldqsonly, oldln, and oldquend don't really need to be initialized. */ @@ -346,20 +296,25 @@ static char **readlines( *pprops = NULL; - cbuf = newbuffer(sizeof (char), errmsg); + cbuf = newbuffer(sizeof (wchar_t), errmsg); if (*errmsg) goto rlcleanup; - lbuf = newbuffer(sizeof (char *), errmsg); + lbuf = newbuffer(sizeof (wchar_t *), errmsg); if (*errmsg) goto rlcleanup; lpbuf = newbuffer(sizeof (lineprop), errmsg); if (*errmsg) goto rlcleanup; for (empty = blank = firstline = 1; ; ) { - c = getchar(); - if (c == EOF) break; - *(unsigned char *)&ch = c; - if (ch == '\n') { + c = getwchar(); + if (c == WEOF) { + if (errno == EILSEQ) { + wcscpy(errmsg, L"Invalid multibyte sequence in input\n"); + goto rlcleanup; + } + break; + } + if (c == L'\n') { if (blank) { - ungetc(c,stdin); + ungetwc(c,stdin); break; } additem(cbuf, &nullchar, errmsg); @@ -368,9 +323,9 @@ static char **readlines( if (*errmsg) goto rlcleanup; if (quote) { for (qpend = ln; *qpend && csmember(*qpend, quotechars); ++qpend); - for (p = qpend; *p == ' ' || csmember(*p, quotechars); ++p); - qsonly = *p == '\0'; - while (qpend > ln && qpend[-1] == ' ') --qpend; + for (p = qpend; *p == L' ' || csmember(*p, quotechars); ++p); + qsonly = (*p == L'\0'); + while (qpend > ln && qpend[-1] == L' ') --qpend; if (!firstline) { for (p = ln, op = oldln; p < qpend && op < oldqpend && *p == *op; @@ -378,23 +333,23 @@ static char **readlines( if (!(p == qpend && op == oldqpend)) { if (!invis && (oldqsonly || qsonly)) { if (oldqsonly) { - *op = '\0'; + *op = L'\0'; oldqpend = op; } if (qsonly) { - *p = '\0'; + *p = L'\0'; qpend = p; } } else { vlnlen = p - ln; - vln = malloc((vlnlen + 1) * sizeof (char)); + vln = malloc((vlnlen + 1) * sizeof (wchar_t)); if (!vln) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto rlcleanup; } - strncpy(vln,ln,vlnlen); - vln[vlnlen] = '\0'; + wcsncpy(vln, ln, vlnlen); + vln[vlnlen] = L'\0'; additem(lbuf, &vln, errmsg); if (*errmsg) goto rlcleanup; additem(lpbuf, &iprop, errmsg); @@ -418,24 +373,27 @@ static char **readlines( } else { if (empty) { - if (csmember(ch, protectchars)) { - ungetc(c,stdin); + if (csmember(c, protectchars)) { + ungetwc(c,stdin); break; } empty = 0; } - if (!ch) continue; - if (ch == '\t') { - ch = ' '; + if (!c) continue; + if (c == L'\t') { + c = L' '; for (i = Tab - numitems(cbuf) % Tab; i > 0; --i) { - additem(cbuf, &ch, errmsg); + additem(cbuf, &c, errmsg); if (*errmsg) goto rlcleanup; } continue; } - if (csmember(ch, whitechars)) ch = ' '; + // if (csmember(ch, whitechars)) ch = ' '; + if (iswspace(c)) + + c = L' '; else blank = 0; - additem(cbuf, &ch, errmsg); + additem(cbuf, &c, errmsg); if (*errmsg) goto rlcleanup; } } @@ -477,9 +435,8 @@ rlcleanup: return lines; } - static void compresuflen( - const char * const *lines, const char * const *endline, + const wchar_t * const *lines, const wchar_t * const *endline, const charset *bodychars, int body, int pre, int suf, int *ppre, int *psuf ) /* lines is an array of strings, up to but not including endline. */ @@ -487,7 +444,7 @@ static void compresuflen( /* lines in lines. Assumes that they have already been determined */ /* to be at least pre and suf. endline must not equal lines. */ { - const char *start, *end, *knownstart, * const *line, *p1, *p2, *knownend, + const wchar_t *start, *end, *knownstart, * const *line, *p1, *p2, *knownend, *knownstart2; start = *lines; @@ -504,7 +461,7 @@ static void compresuflen( } if (body) for (p1 = end; p1 > knownstart; ) - if (*--p1 != ' ') { + if (*--p1 != L' ') { if (csmember(*p1, bodychars)) end = p1; else @@ -531,18 +488,17 @@ static void compresuflen( } if (body) { for (p1 = start; - start < knownend && (*start == ' ' || csmember(*start, bodychars)); + start < knownend && (*start == L' ' || csmember(*start, bodychars)); ++start); - if (start > p1 && start[-1] == ' ') --start; + if (start > p1 && start[-1] == L' ') --start; } else - while (end - start >= 2 && *start == ' ' && start[1] == ' ') ++start; + while (end - start >= 2 && *start == L' ' && start[1] == L' ') ++start; *psuf = end - start; } - static void delimit( - const char * const *lines, const char * const *endline, + const wchar_t * const *lines, const wchar_t * const *endline, const charset *bodychars, int repeat, int body, int div, int pre, int suf, lineprop *props ) @@ -553,8 +509,8 @@ static void delimit( /* and comsuflen of the lines in lines have already been */ /* determined to be at least pre and suf, respectively. */ { - const char * const *line, *end, *p, * const *nextline; - char rc; + const wchar_t * const *line, *end, *p, * const *nextline; + wchar_t rc; lineprop *prop, *nextprop; int anybodiless = 0, status; @@ -575,8 +531,8 @@ static void delimit( for (end = *line; *end; ++end); end -= suf; p = *line + pre; - rc = p < end ? *p : ' '; - if (rc != ' ' && (isinserted(prop) || !repeat || end - p < repeat)) + rc = p < end ? *p : L' '; + if (rc != L' ' && (isinserted(prop) || !repeat || end - p < repeat)) prop->flags &= ~L_BODILESS; else while (p < end) { @@ -619,24 +575,23 @@ static void delimit( } line = lines, prop = props; - status = ((*lines)[pre] == ' '); + status = ((*lines)[pre] == L' '); do { - if (((*line)[pre] == ' ') == status) + if (((*line)[pre] == L' ') == status) prop->flags |= L_FIRST; ++line, ++prop; } while (line < endline); } - static void marksuperf( - const char * const * lines, const char * const * endline, lineprop *props + const wchar_t * const * lines, const wchar_t * const * endline, lineprop *props ) /* lines points to the first line of a segment, and endline to one */ /* line beyond the last line in the segment. Sets L_SUPERF bits in */ /* the flags fields of the props array whenever the corresponding */ /* line is superfluous. L_BODILESS bits must already be set. */ { - const char * const *line, *p; + const wchar_t * const *line, *p; lineprop *prop, *mprop, dummy; int inbody, num, mnum; @@ -649,7 +604,7 @@ static void marksuperf( for (line = lines, prop = props; line < endline; ++line, ++prop) if (isvacant(prop)) { for (num = 0, p = *line; *p; ++p) - if (*p != ' ') ++num; + if (*p != L' ') ++num; if (inbody || num < mnum) mnum = num, mprop = prop; inbody = 0; @@ -657,11 +612,10 @@ static void marksuperf( if (!inbody) mprop->flags &= ~L_SUPERF; inbody = 1; } -} - +} static void setaffixes( - const char * const *inlines, const char * const *endline, + const wchar_t * const *inlines, const wchar_t * const *endline, const lineprop *props, const charset *bodychars, const charset *quotechars, int hang, int body, int quote, int *pafp, int *pfs, int *pprefix, int *psuffix @@ -674,7 +628,7 @@ static void setaffixes( /* default value as specified in "par.doc". */ { int numin, pre, suf; - const char *p; + const wchar_t *p; numin = endline - inlines; @@ -695,12 +649,11 @@ static void setaffixes( *psuffix = numin > hang + 1 ? suf : *pfs; } - -static void freelines(char **lines) +static void freelines(wchar_t **lines) /* Frees the elements of lines, and lines itself. */ /* lines is a NULL-terminated array of strings. */ { - char **line; + wchar_t **line; for (line = lines; *line; ++line) free(*line); @@ -708,68 +661,118 @@ static void freelines(char **lines) free(lines); } - int main(int argc, const char * const *argv) { int help = 0, version = 0, hang = 0, prefix = -1, repeat = 0, suffix = -1, Tab = 1, width = 72, body = 0, cap = 0, div = 0, Err = 0, expel = 0, fit = 0, guess = 0, invis = 0, just = 0, last = 0, quote = 0, Report = 0, touch = -1; - int prefixbak, suffixbak, c, sawnonblank, oweblank, n, i, afp, fs; + int prefixbak, suffixbak, sawnonblank, oweblank, n, i, afp, fs; charset *bodychars = NULL, *protectchars = NULL, *quotechars = NULL, *whitechars = NULL, *terminalchars = NULL; - char *parinit = NULL, *arg, **inlines = NULL, **endline, **firstline, *end, - **nextline, **outlines = NULL, **line, ch; - const char *env, * const init_whitechars = " \f\n\r\t\v"; + wint_t c; + wchar_t *state; + wchar_t *parinit = NULL, *arg, **inlines = NULL, **endline, **firstline, *end, + **nextline, **outlines = NULL, **line; + const char *env; + wchar_t *wenv = NULL; + const wchar_t * const init_whitechars = L" \f\n\r\t\v"; errmsg_t errmsg = { '\0' }; lineprop *props = NULL, *firstprop, *nextprop; FILE *errout; + char *langinfo; /* Set the current locale from the environment: */ setlocale(LC_ALL,""); + langinfo = nl_langinfo(CODESET); + if (!strcmp(langinfo, "ANSI_X3.4-1968")) { + // We would like to fallback in an 8 bits encoding, but it is not easily possible. + //setlocale(LC_CTYPE, "C"); + //langinfo = nl_langinfo(CODESET); + fwprintf( Err ? stderr : stdout, + + L"Warning: Locale seems not configured\n"); + } /* Process environment variables: */ env = getenv("PARBODY"); if (!env) env = ""; - bodychars = parsecharset(env,errmsg); + wenv = malloc((strlen(env) + 1) * sizeof (wchar_t)); + if (!wenv) { + wcscpy(errmsg,outofmem); + goto parcleanup; + } + if (0 > mbstowcs(wenv,env, strlen(env) + 1)) { + wcscpy(errmsg, L"Invalid multibyte sequence in PARBODY\n"); + goto parcleanup; + } + bodychars = parsecharset(wenv,errmsg); if (*errmsg) { help = 1; goto parcleanup; } + free(wenv); + wenv = NULL; env = getenv("PARPROTECT"); if (!env) env = ""; - protectchars = parsecharset(env,errmsg); + wenv = malloc((strlen(env) + 1) * sizeof (wchar_t)); + if (!wenv) { + wcscpy(errmsg,outofmem); + goto parcleanup; + } + if (0 > mbstowcs(wenv,env, strlen(env) + 1)) { + wcscpy(errmsg, L"Invalid multibyte sequence in PARPROTECT\n"); + goto parcleanup; + } + protectchars = parsecharset(wenv,errmsg); if (*errmsg) { help = 1; goto parcleanup; } + free(wenv); + wenv = NULL; env = getenv("PARQUOTE"); if (!env) env = "> "; - quotechars = parsecharset(env,errmsg); + wenv = malloc((strlen(env) + 1) * sizeof (wchar_t)); + if (!wenv) { + wcscpy(errmsg,outofmem); + goto parcleanup; + } + if (0 > mbstowcs(wenv,env, strlen(env) + 1)) { + wcscpy(errmsg, L"Invalid multibyte sequence in PARQUOTE\n"); + goto parcleanup; + } + quotechars = parsecharset(wenv,errmsg); if (*errmsg) { help = 1; goto parcleanup; } + free(wenv); + wenv = NULL; whitechars = parsecharset(init_whitechars, errmsg); if (*errmsg) goto parcleanup; - terminalchars = parsecharset(".?!:", errmsg); + terminalchars = parsecharset(L".?!:", errmsg); if (*errmsg) goto parcleanup; env = getenv("PARINIT"); if (env) { - parinit = malloc((strlen(env) + 1) * sizeof (char)); + parinit = malloc((strlen(env) + 1) * sizeof (wchar_t)); if (!parinit) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); + goto parcleanup; + } + if (0 > mbstowcs(parinit,env, strlen(env) + 1)) { + wcscpy(errmsg, L"Invalid multibyte sequence in PARINIT\n"); goto parcleanup; } - strcpy(parinit,env); - arg = strtok(parinit, init_whitechars); + + arg = wcstok(parinit, (const wchar_t *restrict)whitechars, &state); while (arg) { parsearg(arg, &help, &version, bodychars, protectchars, quotechars, whitechars, terminalchars, @@ -777,7 +780,7 @@ int main(int argc, const char * const *a &body, &cap, &div, &Err, &expel, &fit, &guess, &invis, &just, &last, "e, &Report, &touch, errmsg ); if (*errmsg || help || version) goto parcleanup; - arg = strtok(NULL, init_whitechars); + arg = wcstok(NULL, (const wchar_t *restrict)whitechars, &state); } free(parinit); parinit = NULL; @@ -786,16 +789,22 @@ int main(int argc, const char * const *a /* Process command line arguments: */ while (*++argv) { - parsearg(*argv, &help, &version, - bodychars, protectchars, quotechars, whitechars, terminalchars, + arg = malloc((strlen(*argv) + 1) * sizeof (wchar_t)); + if (0 > mbstowcs(arg, *argv, strlen(*argv) + 1)) { + wcscpy(errmsg, L"Invalid multibyte sequence in argument\n"); + goto parcleanup; + } + parsearg(arg, &help, &version, bodychars, protectchars, + quotechars, whitechars, terminalchars, &hang, &prefix, &repeat, &suffix, &Tab, &width, &body, &cap, &div, &Err, &expel, &fit, &guess, &invis, &just, &last, "e, &Report, &touch, errmsg ); + free(arg); if (*errmsg || help || version) goto parcleanup; } if (Tab == 0) { - strcpy(errmsg, " must not be 0.\n"); + wcscpy(errmsg, L" must not be 0.\n"); goto parcleanup; } @@ -807,31 +816,41 @@ int main(int argc, const char * const *a for (sawnonblank = oweblank = 0; ; ) { for (;;) { - c = getchar(); - if (c == EOF) break; - *(unsigned char *)&ch = c; - if (expel && ch == '\n') { + c = getwchar(); + if (c == WEOF) { + if (errno == EILSEQ) { + wcscpy(errmsg, L"Invalid multibyte sequence in input\n"); + goto parcleanup; + } + break; + } + if (expel && c == L'\n') { oweblank = sawnonblank; continue; } - if (csmember(ch, protectchars)) { + if (csmember(c, protectchars)) { sawnonblank = 1; if (oweblank) { - puts(""); + fputwc(L'\n', stdout); oweblank = 0; } - while (ch != '\n') { - putchar(c); - c = getchar(); - if (c == EOF) break; - *(unsigned char *)&ch = c; + while (c != L'\n') { + putwchar(c); + c = getwchar(); + if (c == WEOF) { + if (errno == EILSEQ) { + wcscpy(errmsg, L"Invalid multibyte sequence in input\n"); + goto parcleanup; + } + break; + } } } - if (ch != '\n') break; /* subsumes the case that c == EOF */ - putchar(c); + if (c != L'\n') break; /* subsumes the case that c == EOF */ + putwchar(c); } - if (c == EOF) break; - ungetc(c,stdin); + if (c == WEOF) break; + ungetwc(c,stdin); inlines = readlines(&props, protectchars, quotechars, whitechars, @@ -847,39 +866,40 @@ int main(int argc, const char * const *a sawnonblank = 1; if (oweblank) { - puts(""); + fputwc(L'\n', stdout); oweblank = 0; } - delimit((const char * const *) inlines, - (const char * const *) endline, + delimit((const wchar_t * const *) inlines, + (const wchar_t * const *) endline, bodychars, repeat, body, div, 0, 0, props); if (expel) - marksuperf((const char * const *) inlines, - (const char * const *) endline, props); + marksuperf((const wchar_t * const *) inlines, + (const wchar_t * const *) endline, props); firstline = inlines, firstprop = props; + do { if (isbodiless(firstprop)) { if ( !(invis && isinserted(firstprop)) && !(expel && issuperf(firstprop))) { for (end = *firstline; *end; ++end); - if (!repeat || (firstprop->rc == ' ' && !firstprop->s)) { - while (end > *firstline && end[-1] == ' ') --end; - *end = '\0'; - puts(*firstline); + if (!repeat || (firstprop->rc == L' ' && !firstprop->s)) { + while (end > *firstline && end[-1] == L' ') --end; + *end = L'\0'; + fwprintf(stdout, L"%ls\n", *firstline); } else { n = width - firstprop->p - firstprop->s; if (n < 0) { - sprintf(errmsg,impossibility,5); + swprintf(errmsg,errmsg_size,impossibility,5); goto parcleanup; } - printf("%.*s", firstprop->p, *firstline); + fwprintf(stdout, L"%.*ls", firstprop->p, *firstline); for (i = n; i; --i) - putchar(*(unsigned char *)&firstprop->rc); - puts(end - firstprop->s); + fputwc(firstprop->rc, stdout); + fwprintf(stdout, L"%ls\n", end - firstprop->s); } } ++firstline, ++firstprop; @@ -891,26 +911,26 @@ int main(int argc, const char * const *a ++nextline, ++nextprop); prefix = prefixbak, suffix = suffixbak; - setaffixes((const char * const *) firstline, - (const char * const *) nextline, firstprop, bodychars, + setaffixes((const wchar_t * const *) firstline, + (const wchar_t * const *) nextline, firstprop, bodychars, quotechars, hang, body, quote, &afp, &fs, &prefix, &suffix); if (width <= prefix + suffix) { - sprintf(errmsg, - " (%d) <= (%d) + (%d)\n", + swprintf(errmsg,errmsg_size, + L" (%d) <= (%d) + (%d)\n", width, prefix, suffix); goto parcleanup; } outlines = - reformat((const char * const *) firstline, - (const char * const *) nextline, + reformat((const wchar_t * const *) firstline, + (const wchar_t * const *) nextline, afp, fs, hang, prefix, suffix, width, cap, fit, guess, just, last, Report, touch, (const charset *) terminalchars, errmsg); if (*errmsg) goto parcleanup; for (line = outlines; *line; ++line) - puts(*line); + fwprintf(stdout, L"%ls\n", *line); freelines(outlines); outlines = NULL; @@ -926,7 +946,7 @@ int main(int argc, const char * const *a } parcleanup: - + if (wenv) free(wenv); if (bodychars) freecharset(bodychars); if (protectchars) freecharset(protectchars); if (quotechars) freecharset(quotechars); @@ -936,8 +956,12 @@ parcleanup: if (outlines) freelines(outlines); errout = Err ? stderr : stdout; - if (*errmsg) fprintf(errout, "par error:\n%.*s", errmsg_size, errmsg); - if (version) fputs("par 1.53.0\n",errout); + if (*errmsg) fwprintf(errout, L"par error:\n%.*ls", errmsg_size, errmsg); +#ifdef NOWIDTH + if (version) fputws(L"par 1.52-i18n.4 (without wcwidth() support)\n",errout); +#else + if (version) fputws(L"par 1.52-i18n.4\n",errout); +#endif if (help) fputs(usagemsg,errout); return *errmsg ? EXIT_FAILURE : EXIT_SUCCESS; --- a/protoMakefile +++ b/protoMakefile @@ -45,9 +45,9 @@ # Example (for Solaris 2.x with SPARCompiler C): # CC = cc -c -O -s -Xc -DDONTFREE -CPPFLAGS = -CFLAGS = -CC = cc $(CPPFLAGS) $(CFLAGS) -c +CPPFLAGS ?= +CFLAGS ?= +CC = cc -std=c99 $(CPPFLAGS) $(CFLAGS) -c # Define LINK1 and LINK2 so that the command # @@ -62,7 +62,7 @@ CC = cc $(CPPFLAGS) $(CFLAGS) -c # LINK2 = -o LINK1 = cc -LINK2 = -o +LINK2 = $(CFLAGS) $(CPPFLAGS) -o # Define RM so that the command # --- a/reformat.c +++ b/reformat.c @@ -3,6 +3,7 @@ reformat.c last touched in Par 1.53.0 last meaningful change in Par 1.53.0 Copyright 1993, 2001, 2020 Adam M. Costello +Modified by Jérôme Pouiller This is ANSI C code (C89). @@ -23,6 +24,8 @@ the ctype.h functions. See the comments #include #include #include +#include +#include #undef NULL #define NULL ((void *) 0) @@ -35,14 +38,15 @@ the ctype.h functions. See the comments typedef unsigned char wflag_t; typedef struct word { - const char *chrs; /* Pointer to the characters in the word */ + const wchar_t *chrs; /* Pointer to the characters in the word */ /* (NOT terminated by '\0'). */ struct word *prev, /* Pointer to previous word. */ *next, /* Pointer to next word. */ /* Supposing this word were the first... */ *nextline; /* Pointer to first word in next line. */ int score, /* Value of the objective function. */ - length; /* Length of this word. */ + length, /* Length (in widechar) of this word. */ + width; /* Visual width of this word. */ wflag_t flags; /* Notable properties of this word. */ } word; @@ -59,17 +63,39 @@ static const wflag_t #define iscurious(w) (((w)->flags & 2) != 0) #define iscapital(w) (((w)->flags & 4) != 0) +static int getWidth(const wchar_t *beg, const wchar_t *end) +/* Compute (visual) width of a word. This function is aware */ +/* about double-width characters used in oriental langages. */ +{ + int ret, tmp; + + for (ret = 0; beg != end; beg++) { +#ifdef NOWIDTH + tmp = 1; +#else + tmp = wcwidth(*beg); +#endif + // BUG: It is not really easy to handle case of zero width characters. + // If we don't do this, size mallloc for q1 will be less than real + // size and program will segfault. So I prefer to have a bug than a segfault. + if (tmp <= 0) + tmp = 1; + ret += tmp; + } + + return ret; +} static int checkcapital(word *w) /* Returns 1 if *w is capitalized according to the definition */ /* in par.doc (assuming is 0), or 0 if not. */ { - const char *p, *end; + const wchar_t *p, *end; for (p = w->chrs, end = p + w->length; - p < end && !isalnum(*(unsigned char *)p); + p < end && !iswalnum(*p); ++p); - return p < end && !islower(*(unsigned char *)p); + return p < end && !iswlower(*p); } @@ -77,19 +103,19 @@ static int checkcurious(word *w, const c /* Returns 1 if *w is curious according to */ /* the definition in par.doc, or 0 if not. */ { - const char *start, *p; - char ch; + const wchar_t *start, *p; + wchar_t ch; for (start = w->chrs, p = start + w->length; p > start; --p) { ch = p[-1]; - if (isalnum(*(unsigned char *)&ch)) return 0; + if (iswalnum(*(wchar_t *)&ch)) return 0; if (csmember(ch,terminalchars)) break; } if (p <= start + 1) return 0; --p; - do if (isalnum(*(unsigned char *)--p)) return 1; + do if (iswalnum(*(wchar_t *)--p)) return 1; while (p > start); return 0; @@ -97,31 +123,32 @@ static int checkcurious(word *w, const c static int simplebreaks(word *head, word *tail, int L, int last) - -/* Chooses line breaks in a list of words which maximize the length of the */ -/* shortest line. L is the maximum line length. The last line counts as a */ -/* line only if last is non-zero. _head must point to a dummy word, and tail */ -/* must point to the last word, whose next field must be NULL. Returns the */ -/* length of the shortest line on success, -1 if there is a word of length */ -/* greater than L, or L if there are no lines. */ +/* Chooses line breaks in a list of words which maximize */ +/* the length of the shortest line. L is the maximum line */ +/* length. The last line counts as a line only if last is */ +/* non-zero. _head must point to a dummy word, and tail */ +/* must point to the last word, whose next field must be */ +/* NULL. Returns the length of the shortest line on */ +/* success, -1 if there is a word of length greater than L, */ +/* or L if there are no lines. */ { word *w1, *w2; int linelen, score; if (!head->next) return L; - for (w1 = tail, linelen = w1->length; + for (w1 = tail, linelen = w1->width; w1 != head && linelen <= L; - linelen += isshifted(w1), w1 = w1->prev, linelen += 1 + w1->length) { + linelen += isshifted(w1), w1 = w1->prev, linelen += 1 + w1->width) { w1->score = last ? linelen : L; w1->nextline = NULL; } for ( ; w1 != head; w1 = w1->prev) { w1->score = -1; - for (linelen = w1->length, w2 = w1->next; + for (linelen = w1->width, w2 = w1->next; linelen <= L; - linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next) { + linelen += 1 + isshifted(w2) + w2->width, w2 = w2->next) { score = w2->score; if (linelen < score) score = linelen; if (score >= w1->score) { @@ -170,7 +197,7 @@ static void normalbreaks( shortest = simplebreaks(head,tail,target,last); if (shortest < 0) { - sprintf(errmsg,impossibility,1); + swprintf(errmsg,errmsg_size,impossibility,1); return; } @@ -180,9 +207,9 @@ static void normalbreaks( w1 = tail; do { w1->score = -1; - for (linelen = w1->length, w2 = w1->next; + for (linelen = w1->width, w2 = w1->next; linelen <= target; - linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next) { + linelen += 1 + isshifted(w2) + w2->width, w2 = w2->next) { extra = target - linelen; minlen = shortest; if (w2) @@ -204,7 +231,7 @@ static void normalbreaks( } while (w1 != head); if (head->next->score < 0) - sprintf(errmsg,impossibility,2); + swprintf(errmsg,errmsg_size,impossibility,2); } @@ -227,9 +254,9 @@ static void justbreaks( w1 = tail; do { w1->score = L; - for (numgaps = 0, extra = L - w1->length, w2 = w1->next; + for (numgaps = 0, extra = L - w1->width, w2 = w1->next; extra >= 0; - ++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next) { + ++numgaps, extra -= 1 + isshifted(w2) + w2->width, w2 = w2->next) { gap = numgaps ? (extra + numgaps - 1) / numgaps : L; if (w2) score = w2->score; @@ -249,7 +276,7 @@ static void justbreaks( maxgap = head->next->score; if (maxgap >= L) { - strcpy(errmsg, "Cannot justify.\n"); + wcscpy(errmsg, L"Cannot justify.\n"); return; } @@ -259,9 +286,9 @@ static void justbreaks( w1 = tail; do { w1->score = -1; - for (numgaps = 0, extra = L - w1->length, w2 = w1->next; + for (numgaps = 0, extra = L - w1->width, w2 = w1->next; extra >= 0; - ++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next) { + ++numgaps, extra -= 1 + isshifted(w2) + w2->width, w2 = w2->next) { gap = numgaps ? (extra + numgaps - 1) / numgaps : L; if (w2) score = w2->score; @@ -290,20 +317,20 @@ static void justbreaks( } while (w1 != head); if (head->next->score < 0) - sprintf(errmsg,impossibility,3); + swprintf(errmsg,errmsg_size,impossibility,3); } -char **reformat( - const char * const *inlines, const char * const *endline, int afp, int fs, +wchar_t **reformat( + const wchar_t * const *inlines, const wchar_t * const *endline, int afp, int fs, int hang, int prefix, int suffix, int width, int cap, int fit, int guess, int just, int last, int Report, int touch, const charset *terminalchars, errmsg_t errmsg ) { int numin, affix, L, onfirstword = 1, linelen, numout, numgaps, extra, phase; - const char * const *line, **suffixes = NULL, **suf, *end, *p1, *p2; - char *q1, *q2, **outlines = NULL; + const wchar_t * const *line, **suffixes = NULL, **suf, *end, *p1, *p2; + wchar_t *q1, *q2, **outlines = NULL; word dummy, *head, *tail, *w1, *w2; buffer *pbuf = NULL; @@ -315,16 +342,16 @@ char **reformat( head = tail = &dummy; numin = endline - inlines; if (numin <= 0) { - sprintf(errmsg,impossibility,4); + swprintf(errmsg,errmsg_size,impossibility,4); goto rfcleanup; } numgaps = extra = 0; /* unnecessary, but quiets compiler warnings */ /* Allocate space for pointers to the suffixes: */ - suffixes = malloc(numin * sizeof (const char *)); + suffixes = malloc(numin * sizeof (const wchar_t *)); if (!suffixes) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto rfcleanup; } @@ -337,8 +364,8 @@ char **reformat( do { for (end = *line; *end; ++end); if (end - *line < affix) { - sprintf(errmsg, - "Line %ld shorter than + = %d + %d = %d\n", + swprintf(errmsg,errmsg_size, + L"Line %d shorter than + = %d + %d = %d\n", (long)(line - inlines + 1), prefix, suffix, affix); goto rfcleanup; } @@ -346,17 +373,17 @@ char **reformat( *suf = end; p1 = *line + prefix; for (;;) { - while (p1 < end && *p1 == ' ') ++p1; + while (p1 < end && *p1 == L' ') ++p1; if (p1 == end) break; p2 = p1; if (onfirstword) { p1 = *line + prefix; onfirstword = 0; } - while (p2 < end && *p2 != ' ') ++p2; + while (p2 < end && *p2 != L' ') ++p2; w1 = malloc(sizeof (word)); if (!w1) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto rfcleanup; } w1->next = NULL; @@ -364,6 +391,7 @@ char **reformat( tail = tail->next = w1; w1->chrs = p1; w1->length = p2 - p1; + w1->width = getWidth(p1, p2); w1->flags = 0; p1 = p2; } @@ -380,6 +408,7 @@ char **reformat( if (iscurious(w1)) { if (w1->chrs[w1->length] && w1->chrs + w1->length + 1 == w2->chrs) { w2->length += w1->length + 1; + w2->width += w1->width + 1; w2->chrs = w1->chrs; w2->prev = w1->prev; w2->prev->next = w2; @@ -400,20 +429,20 @@ char **reformat( if (Report) for (w2 = head->next; w2; w2 = w2->next) { - if (w2->length > L) { - linelen = w2->length; + if (w2->width > L) { + linelen = w2->width; if (linelen > errmsg_size - 17) linelen = errmsg_size - 17; - sprintf(errmsg, "Word too long: %.*s\n", linelen, w2->chrs); + swprintf(errmsg,errmsg_size, L"Word too long: %.*ls\n", linelen, w2->chrs); goto rfcleanup; } } else for (w2 = head->next; w2; w2 = w2->next) - while (w2->length > L) { + while (w2->width > L) { w1 = malloc(sizeof (word)); if (!w1) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto rfcleanup; } w1->next = w2; @@ -423,7 +452,9 @@ char **reformat( w1->chrs = w2->chrs; w2->chrs += L; w1->length = L; + w1->width = getWidth(w1->chrs, w1->chrs + L); w2->length -= L; + w2->width -= w1->width; w1->flags = 0; if (iscapital(w2)) { w1->flags |= W_CAPITAL; @@ -447,9 +478,9 @@ char **reformat( L = 0; w1 = head->next; while (w1) { - for (linelen = w1->length, w2 = w1->next; + for (linelen = w1->width, w2 = w1->next; w2 != w1->nextline; - linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next); + linelen += 1 + isshifted(w2) + w2->width, w2 = w2->next); if (linelen > L) L = linelen; w1 = w2; } @@ -457,67 +488,67 @@ char **reformat( /* Construct the lines: */ - pbuf = newbuffer(sizeof (char *), errmsg); + pbuf = newbuffer(sizeof (wchar_t *), errmsg); if (*errmsg) goto rfcleanup; numout = 0; w1 = head->next; while (numout < hang || w1) { if (w1) - for (w2 = w1->next, numgaps = 0, extra = L - w1->length; + for (w2 = w1->next, numgaps = 0, extra = L - w1->width; w2 != w1->nextline; - ++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next); + ++numgaps, extra -= 1 + isshifted(w2) + w2->width, w2 = w2->next); linelen = suffix || (just && (w2 || last)) ? L + affix : w1 ? prefix + L - extra : prefix; - q1 = malloc((linelen + 1) * sizeof (char)); + q1 = malloc((linelen + 1) * sizeof (wchar_t)); if (!q1) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto rfcleanup; } additem(pbuf, &q1, errmsg); if (*errmsg) goto rfcleanup; ++numout; q2 = q1 + prefix; - if (numout <= numin) memcpy(q1, inlines[numout - 1], prefix); - else if (numin > hang ) memcpy(q1, endline[-1], prefix); + if (numout <= numin) memcpy(q1, inlines[numout - 1], prefix * sizeof(wchar_t)); + else if (numin > hang ) memcpy(q1, endline[-1], prefix * sizeof(wchar_t)); else { if (afp > prefix) afp = prefix; - memcpy(q1, endline[-1], afp); + memcpy(q1, endline[-1], afp * sizeof(wchar_t)); q1 += afp; - while (q1 < q2) *q1++ = ' '; + while (q1 < q2) *q1++ = L' '; } q1 = q2; if (w1) { phase = numgaps / 2; for (w2 = w1; ; ) { - memcpy(q1, w2->chrs, w2->length); + memcpy(q1, w2->chrs, w2->length * sizeof(wchar_t)); q1 += w2->length; w2 = w2->next; if (w2 == w1->nextline) break; - *q1++ = ' '; + *q1++ = L' '; if (just && (w1->nextline || last)) { phase += extra; while (phase >= numgaps) { - *q1++ = ' '; + *q1++ = L' '; phase -= numgaps; } } - if (isshifted(w2)) *q1++ = ' '; + if (isshifted(w2)) *q1++ = L' '; } } q2 += linelen - affix; - while (q1 < q2) *q1++ = ' '; + while (q1 < q2) *q1++ = L' '; q2 = q1 + suffix; - if (numout <= numin) memcpy(q1, suffixes[numout - 1], suffix); - else if (numin > hang ) memcpy(q1, suffixes[numin - 1], suffix); + if (numout <= numin) memcpy(q1, suffixes[numout - 1], suffix * sizeof(wchar_t)); + else if (numin > hang ) memcpy(q1, suffixes[numin - 1], suffix * sizeof(wchar_t)); else { if (fs > suffix) fs = suffix; - memcpy(q1, suffixes[numin - 1], fs); + memcpy(q1, suffixes[numin - 1], fs * sizeof(wchar_t)); q1 += fs; - while(q1 < q2) *q1++ = ' '; + while(q1 < q2) *q1++ = L' '; } - *q2 = '\0'; + *q2 = L'\0'; if (w1) w1 = w1->nextline; } @@ -546,5 +577,6 @@ rfcleanup: freebuffer(pbuf); } + return outlines; } --- a/reformat.h +++ b/reformat.h @@ -3,6 +3,7 @@ reformat.h last touched in Par 1.53.0 last meaningful change in Par 1.53.0 Copyright 1993, 2020 Adam M. Costello +Modified by Jérôme Pouiller This is ANSI C code (C89). @@ -11,10 +12,11 @@ This is ANSI C code (C89). #include "charset.h" #include "errmsg.h" +#include -char **reformat( - const char * const *inlines, const char * const *endline, int afp, int fs, +wchar_t **reformat( + const wchar_t * const *inlines, const wchar_t * const *endline, int afp, int fs, int hang, int prefix, int suffix, int width, int cap, int fit, int guess, int just, int last, int Report, int touch, const charset *terminalchars, errmsg_t errmsg --- a/releasenotes +++ b/releasenotes @@ -2,10 +2,26 @@ releasenotes last touched in Par 1.53.0 last meaningful change in Par 1.53.0 Copyright 1993, 1996, 2000, 2001, 2020 Adam M. Costello +Modified by Jérôme Pouiller Each entry below describes changes since the previous version. +Par 1.52-i18n.4 released 2009-May-05 + Change nearly all char in wchar_t remove nightmare of unsigned char vs signed char + Fix bugs with option 'q' + Fix bugs with '\n' + +Par 1.52-i18n.3 released 2006-Oct-03 + Fix bug with option 'g' + +Par 1.52-i18n.2 released 2006-Aug-03 + Fix bug debian #310495. + +Par 1.52-i18n.1 released 2006-Jun-22 + Changed char in wchar_t. Allow support of multibytes characters. + Added support for double-width characters. + Par 1.53.0 released 2020-Mar-14 Fixed the following bugs: An unintended bad interaction between and .