mailx/mailx-12.5-mime.dif
2012-03-30 14:52:32 +00:00

323 lines
9.0 KiB
Plaintext

--- def.h
+++ def.h 2012-03-23 13:57:44.000000000 +0000
@@ -142,7 +142,9 @@ enum mimeclean {
MIME_LONGLINES = 002, /* has lines too long for RFC 2822 */
MIME_CTRLCHAR = 004, /* contains control characters */
MIME_HASNUL = 010, /* contains \0 characters */
- MIME_NOTERMNL = 020 /* lacks a terminating newline */
+ MIME_NOTERMNL = 020, /* lacks a terminating newline */
+ MIME_UTF8 = 040, /* UTF-8 high bit multi byte characters */
+ MIME_LATIN = 0100 /* Latin high bit single byte characters */
};
enum tdflags {
--- mime.c
+++ mime.c 2012-03-30 14:50:10.318065066 +0000
@@ -302,13 +302,78 @@ gettcharset(void)
return t;
}
+#define F 0 /* character never appears in mail text */
+#define T 1 /* character appears in plain ASCII text */
+#define I 2 /* character appears in ISO-8859 text */
+#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
+
+static char text_chars[256] = {
+ /* NUL BEL BS HT LF FF CR */
+ F, F, F, F, F, F, F, F, T, T, T, F, F, T, F, F, /* 0x0X */
+ /* ESC */
+ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
+ /* NEL */
+ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
+ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
+};
+
+static int encflags;
+static void
+test_enc(const char *s)
+{
+ int c = *s;
+ if (c & 0100) {
+ int n, follow;
+
+ if ((c & 040) == 0) /* 110xxxxx */
+ follow = 1;
+ else if ((c & 020) == 0) /* 1110xxxx */
+ follow = 2;
+ else if ((c & 010) == 0) /* 11110xxx */
+ follow = 3;
+ else if ((c & 004) == 0) /* 111110xx */
+ follow = 4;
+ else if ((c & 002) == 0) /* 1111110x */
+ follow = 5;
+ else
+ return;
+
+ for (n = 1; n <= follow; n++) {
+ if ((c = *(s+n)) == '\0')
+ goto latin;
+ if ((c & 0200) == 0 || (c & 0100))
+ goto latin;
+ }
+ encflags = MIME_UTF8;
+ return;
+ }
+latin:
+ c = *s;
+ if (text_chars[c & 0377] == I)
+ encflags = MIME_LATIN;
+}
+
static int
has_highbit(const char *s)
{
if (s) {
do
- if (*s & 0200)
+ if (*s & 0200) {
+ test_enc(s);
return 1;
+ }
while (*s++ != '\0');
}
return 0;
@@ -328,6 +393,7 @@ name_highbit(struct name *np)
char *
need_hdrconv(struct header *hp, enum gfield w)
{
+ encflags = 0;
if (w & GIDENT) {
if (hp->h_from && name_highbit(hp->h_from))
goto needs;
@@ -355,7 +421,7 @@ need_hdrconv(struct header *hp, enum gfi
if (w & GSUBJECT && has_highbit(hp->h_subject))
goto needs;
return NULL;
-needs: return getcharset(MIME_HIGHBIT);
+needs: return getcharset(MIME_HIGHBIT|encflags);
}
#ifdef HAVE_ICONV
@@ -441,7 +507,7 @@ iconv_open_ft(const char *tocode, const
* be used to check the validity of the input even with
* identical encoding names.
*/
- if (strcmp(t, f) == 0)
+ if (asccasecmp(t, f) == 0)
errno = 0;
return (iconv_t)-1;
}
@@ -665,7 +731,7 @@ mime_tline(char *x, char *l)
l++;
if (*l != '\0')
*l++ = '\0';
- if (strcmp(x, n) == 0) {
+ if (asccasecmp(x, n) == 0) {
match = 1;
break;
}
@@ -748,11 +814,54 @@ mime_isclean(FILE *f)
maxlen = curlen;
curlen = 1;
} else if (c & 0200) {
+ int i = c;
isclean |= MIME_HIGHBIT;
+ if (c & 0100) {
+ int n, follow;
+
+ if ((c & 040) == 0) /* 110xxxxx */
+ follow = 1;
+ else if ((c & 020) == 0) /* 1110xxxx */
+ follow = 2;
+ else if ((c & 010) == 0) /* 11110xxx */
+ follow = 3;
+ else if ((c & 004) == 0) /* 111110xx */
+ follow = 4;
+ else if ((c & 002) == 0) /* 1111110x */
+ follow = 5;
+ else
+ continue;
+
+ for (n = 0; n < follow; n++) {
+ lastc = c;
+ c = getc(f);
+ curlen++;
+
+ if (c == '\0') {
+ isclean |= MIME_HASNUL;
+ goto latin;
+ }
+ if ((c & 0200) == 0 || (c & 0100))
+ goto latin;
+ if ((c == '\n') || (c == EOF)) {
+ if (curlen > maxlen)
+ maxlen = curlen;
+ curlen = 1;
+ goto latin;
+ }
+ }
+ isclean |= MIME_UTF8;
+ continue;
+ }
+ latin:
+ if (text_chars[i & 0377] == I)
+ isclean |= MIME_LATIN;
+ if (text_chars[i & 0377] == X)
+ isclean |= MIME_CTRLCHAR;
} else if (c == '\0') {
isclean |= MIME_HASNUL;
break;
- } else if ((c < 040 && (c != '\t' && c != '\f')) || c == 0177) {
+ } else if (text_chars[c & 0377] == F) {
isclean |= MIME_CTRLCHAR;
}
} while (c != EOF);
@@ -826,11 +935,16 @@ get_mime_convert(FILE *fp, char **conten
* ^I or ^L in text/plain bodies. However, some
* obscure character sets actually contain these
* characters, so the content type can be set.
+ * Beside ^I or ^L from RFC 2046 we accept also
+ * backspace ^H often used in enhanced text.
*/
if ((*contenttype = value("contenttype-cntrl")) == NULL)
*contenttype = "application/octet-stream";
} else if (*contenttype == NULL)
*contenttype = "text/plain";
+ } else if (ascncasecmp(*contenttype, "text/", 5) == 0)
+ {
+ *charset = getcharset(*isclean);
}
return convert;
}
@@ -989,8 +1103,14 @@ mime_fromhdr(struct str *in, struct str
#ifdef HAVE_ICONV
iconv_t fhicd = (iconv_t)-1;
#endif
+ enum mimeclean isclean = 0;
tcs = gettcharset();
+
+ encflags = 0;
+ if (has_highbit(in->s))
+ isclean |= (MIME_HIGHBIT|encflags);
+
maxstor = in->l;
out->s = smalloc(maxstor + 1);
out->l = 0;
@@ -1010,7 +1130,7 @@ mime_fromhdr(struct str *in, struct str
#ifdef HAVE_ICONV
if (fhicd != (iconv_t)-1)
iconv_close(fhicd);
- if (strcmp(cs, tcs))
+ if (asccasecmp(cs, tcs))
fhicd = iconv_open_ft(tcs, cs);
else
fhicd = (iconv_t)-1;
@@ -1105,12 +1225,17 @@ notmime:
}
fromhdr_end:
*q = '\0';
- if (flags & TD_ISPR) {
+ if ((flags & TD_ISPR) && (isclean & MIME_HIGHBIT)) {
struct str new;
+ if ((isclean & MIME_UTF8) && asccasecmp("utf-8", tcs) == 0)
+ goto skip;
+ if ((isclean & MIME_LATIN) && ascncasecmp("iso-8859-", tcs, 9) == 0)
+ goto skip;
makeprint(out, &new);
free(out->s);
*out = new;
}
+skip:
if (flags & TD_DELCTRL)
out->l = delctrl(out->s, out->l);
#ifdef HAVE_ICONV
--- sendout.c
+++ sendout.c 2012-03-23 18:30:50.949933289 +0000
@@ -226,6 +226,23 @@ attach_file1(struct attachment *ap, FILE
charset = ap->a_charset;
convert = get_mime_convert(fi, &contenttype, &charset, &isclean,
dosign);
+#ifdef HAVE_ICONV
+ tcs = gettcharset();
+ if (isclean & MIME_UTF8)
+ {
+ tcs = "utf-8";
+ }
+ if (isclean & MIME_LATIN) {
+ tcs = value("charset");
+ if (tcs == NULL && wantcharset && wantcharset != (char *)-1)
+ tcs = wantcharset;
+ }
+ if (tcs == NULL) {
+ contenttype = "application/octet-stream";
+ charset = NULL;
+ convert = CONV_TOB64;
+ }
+#endif
fprintf(fo,
"\n--%s\n"
"Content-Type: %s",
@@ -255,11 +272,10 @@ attach_file1(struct attachment *ap, FILE
iconv_close(iconvd);
iconvd = (iconv_t)-1;
}
- tcs = gettcharset();
if ((isclean & (MIME_HASNUL|MIME_CTRLCHAR)) == 0 &&
ascncasecmp(contenttype, "text/", 5) == 0 &&
- isclean & MIME_HIGHBIT &&
- charset != NULL) {
+ (isclean & MIME_HIGHBIT) &&
+ charset != NULL && tcs != NULL) {
if ((iconvd = iconv_open_ft(charset, tcs)) == (iconv_t)-1 &&
errno != 0) {
if (errno == EINVAL)
@@ -467,11 +483,12 @@ infix(struct header *hp, FILE *fi, int d
}
rm(tempMail);
Ftfree(&tempMail);
- convert = get_mime_convert(fi, &contenttype, &charset,
- &isclean, dosign);
+ convert = get_mime_convert(fi, &contenttype, &charset, &isclean,
+ dosign);
#ifdef HAVE_ICONV
tcs = gettcharset();
- if ((convhdr = need_hdrconv(hp, GTO|GSUBJECT|GCC|GBCC|GIDENT)) != 0) {
+ if ((convhdr = need_hdrconv(hp, GTO|GSUBJECT|GCC|GBCC|GIDENT)) != 0 &&
+ tcs != NULL) {
if (iconvd != (iconv_t)-1)
iconv_close(iconvd);
if ((iconvd = iconv_open_ft(convhdr, tcs)) == (iconv_t)-1
@@ -505,10 +522,17 @@ infix(struct header *hp, FILE *fi, int d
iconv_close(iconvd);
iconvd = (iconv_t)-1;
}
+ if (isclean & MIME_UTF8)
+ tcs = "utf-8";
+ if (isclean & MIME_LATIN) {
+ tcs = value("charset");
+ if (tcs == NULL && wantcharset && wantcharset != (char *)-1)
+ tcs = wantcharset;
+ }
if ((isclean & (MIME_HASNUL|MIME_CTRLCHAR)) == 0 &&
ascncasecmp(contenttype, "text/", 5) == 0 &&
- isclean & MIME_HIGHBIT &&
- charset != NULL) {
+ (isclean & MIME_HIGHBIT) &&
+ charset != NULL && tcs != NULL) {
if (iconvd != (iconv_t)-1)
iconv_close(iconvd);
if ((iconvd = iconv_open_ft(charset, tcs)) == (iconv_t)-1