335 lines
9.3 KiB
Plaintext
335 lines
9.3 KiB
Plaintext
--- def.h
|
|
+++ def.h 2012-03-23 13:57:44.000000000 +0000
|
|
@@ -142,7 +142,9 @@ enum mimeclean {
|
|
MIME_LONGLINES = 002, /* has lines too long for RFC 2822 */
|
|
MIME_CTRLCHAR = 004, /* contains control characters */
|
|
MIME_HASNUL = 010, /* contains \0 characters */
|
|
- MIME_NOTERMNL = 020 /* lacks a terminating newline */
|
|
+ MIME_NOTERMNL = 020, /* lacks a terminating newline */
|
|
+ MIME_UTF8 = 040, /* UTF-8 high bit multi byte characters */
|
|
+ MIME_LATIN = 0100 /* Latin high bit single byte characters */
|
|
};
|
|
|
|
enum tdflags {
|
|
--- mime.c
|
|
+++ mime.c 2013-07-02 10:25:35.000000000 +0000
|
|
@@ -302,13 +302,78 @@ gettcharset(void)
|
|
return t;
|
|
}
|
|
|
|
+#define F 0 /* character never appears in mail text */
|
|
+#define T 1 /* character appears in plain ASCII text */
|
|
+#define I 2 /* character appears in ISO-8859 text */
|
|
+#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
|
|
+
|
|
+static char text_chars[256] = {
|
|
+ /* NUL BEL BS HT LF FF CR */
|
|
+ F, F, F, F, F, F, F, F, T, T, T, F, F, T, F, F, /* 0x0X */
|
|
+ /* ESC */
|
|
+ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
|
|
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
|
|
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
|
|
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
|
|
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
|
|
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
|
|
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
|
|
+ /* NEL */
|
|
+ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
|
|
+ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
|
|
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
|
|
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
|
|
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
|
|
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
|
|
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
|
|
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
|
|
+};
|
|
+
|
|
+static int encflags;
|
|
+static void
|
|
+test_enc(const char *s)
|
|
+{
|
|
+ int c = *s;
|
|
+ if (c & 0100) {
|
|
+ int n, follow;
|
|
+
|
|
+ if ((c & 040) == 0) /* 110xxxxx */
|
|
+ follow = 1;
|
|
+ else if ((c & 020) == 0) /* 1110xxxx */
|
|
+ follow = 2;
|
|
+ else if ((c & 010) == 0) /* 11110xxx */
|
|
+ follow = 3;
|
|
+ else if ((c & 004) == 0) /* 111110xx */
|
|
+ follow = 4;
|
|
+ else if ((c & 002) == 0) /* 1111110x */
|
|
+ follow = 5;
|
|
+ else
|
|
+ return;
|
|
+
|
|
+ for (n = 1; n <= follow; n++) {
|
|
+ if ((c = *(s+n)) == '\0')
|
|
+ goto latin;
|
|
+ if ((c & 0200) == 0 || (c & 0100))
|
|
+ goto latin;
|
|
+ }
|
|
+ encflags = MIME_UTF8;
|
|
+ return;
|
|
+ }
|
|
+latin:
|
|
+ c = *s;
|
|
+ if (text_chars[c & 0377] == I)
|
|
+ encflags = MIME_LATIN;
|
|
+}
|
|
+
|
|
static int
|
|
has_highbit(const char *s)
|
|
{
|
|
if (s) {
|
|
do
|
|
- if (*s & 0200)
|
|
+ if (*s & 0200) {
|
|
+ test_enc(s);
|
|
return 1;
|
|
+ }
|
|
while (*s++ != '\0');
|
|
}
|
|
return 0;
|
|
@@ -328,6 +393,7 @@ name_highbit(struct name *np)
|
|
char *
|
|
need_hdrconv(struct header *hp, enum gfield w)
|
|
{
|
|
+ encflags = 0;
|
|
if (w & GIDENT) {
|
|
if (hp->h_from && name_highbit(hp->h_from))
|
|
goto needs;
|
|
@@ -355,7 +422,7 @@ need_hdrconv(struct header *hp, enum gfi
|
|
if (w & GSUBJECT && has_highbit(hp->h_subject))
|
|
goto needs;
|
|
return NULL;
|
|
-needs: return getcharset(MIME_HIGHBIT);
|
|
+needs: return getcharset(MIME_HIGHBIT|encflags);
|
|
}
|
|
|
|
#ifdef HAVE_ICONV
|
|
@@ -441,7 +507,7 @@ iconv_open_ft(const char *tocode, const
|
|
* be used to check the validity of the input even with
|
|
* identical encoding names.
|
|
*/
|
|
- if (strcmp(t, f) == 0)
|
|
+ if (asccasecmp(t, f) == 0)
|
|
errno = 0;
|
|
return (iconv_t)-1;
|
|
}
|
|
@@ -665,7 +731,7 @@ mime_tline(char *x, char *l)
|
|
l++;
|
|
if (*l != '\0')
|
|
*l++ = '\0';
|
|
- if (strcmp(x, n) == 0) {
|
|
+ if (asccasecmp(x, n) == 0) {
|
|
match = 1;
|
|
break;
|
|
}
|
|
@@ -748,16 +814,66 @@ mime_isclean(FILE *f)
|
|
maxlen = curlen;
|
|
curlen = 1;
|
|
} else if (c & 0200) {
|
|
+ int i = c;
|
|
isclean |= MIME_HIGHBIT;
|
|
+ if (c & 0100) {
|
|
+ int n, follow;
|
|
+
|
|
+ if ((c & 040) == 0) /* 110xxxxx */
|
|
+ follow = 1;
|
|
+ else if ((c & 020) == 0) /* 1110xxxx */
|
|
+ follow = 2;
|
|
+ else if ((c & 010) == 0) /* 11110xxx */
|
|
+ follow = 3;
|
|
+ else if ((c & 004) == 0) /* 111110xx */
|
|
+ follow = 4;
|
|
+ else if ((c & 002) == 0) /* 1111110x */
|
|
+ follow = 5;
|
|
+ else
|
|
+ continue;
|
|
+
|
|
+ for (n = 0; n < follow; n++) {
|
|
+ lastc = c;
|
|
+ c = getc(f);
|
|
+ curlen++;
|
|
+
|
|
+ if (c == '\0') {
|
|
+ isclean |= MIME_HASNUL;
|
|
+ goto latin;
|
|
+ }
|
|
+ if ((c & 0200) == 0 || (c & 0100))
|
|
+ goto latin;
|
|
+ if ((c == '\n') || (c == EOF)) {
|
|
+ if (curlen > maxlen)
|
|
+ maxlen = curlen;
|
|
+ curlen = 1;
|
|
+ goto latin;
|
|
+ }
|
|
+ }
|
|
+ isclean |= MIME_UTF8;
|
|
+ continue;
|
|
+ }
|
|
+ latin:
|
|
+ if (text_chars[i & 0377] == I) {
|
|
+ isclean |= MIME_LATIN;
|
|
+ continue;
|
|
+ }
|
|
+ if (text_chars[i & 0377] == X) {
|
|
+ isclean |= MIME_CTRLCHAR;
|
|
+ break;
|
|
+ }
|
|
} else if (c == '\0') {
|
|
isclean |= MIME_HASNUL;
|
|
break;
|
|
- } else if ((c < 040 && (c != '\t' && c != '\f')) || c == 0177) {
|
|
+ } else if (text_chars[c & 0377] == F) {
|
|
isclean |= MIME_CTRLCHAR;
|
|
+ break;
|
|
}
|
|
} while (c != EOF);
|
|
if (lastc != '\n')
|
|
isclean |= MIME_NOTERMNL;
|
|
+ if (isclean & (MIME_CTRLCHAR|MIME_HASNUL))
|
|
+ isclean &= (MIME_CTRLCHAR|MIME_HASNUL);
|
|
clearerr(f);
|
|
fseek(f, initial_pos, SEEK_SET);
|
|
if ((cp = value("maximum-unencoded-line-length")) != NULL)
|
|
@@ -826,11 +942,16 @@ get_mime_convert(FILE *fp, char **conten
|
|
* ^I or ^L in text/plain bodies. However, some
|
|
* obscure character sets actually contain these
|
|
* characters, so the content type can be set.
|
|
+ * Beside ^I or ^L from RFC 2046 we accept also
|
|
+ * backspace ^H often used in enhanced text.
|
|
*/
|
|
if ((*contenttype = value("contenttype-cntrl")) == NULL)
|
|
*contenttype = "application/octet-stream";
|
|
} else if (*contenttype == NULL)
|
|
*contenttype = "text/plain";
|
|
+ } else if (ascncasecmp(*contenttype, "text/", 5) == 0)
|
|
+ {
|
|
+ *charset = getcharset(*isclean);
|
|
}
|
|
return convert;
|
|
}
|
|
@@ -989,8 +1110,14 @@ mime_fromhdr(struct str *in, struct str
|
|
#ifdef HAVE_ICONV
|
|
iconv_t fhicd = (iconv_t)-1;
|
|
#endif
|
|
+ enum mimeclean isclean = 0;
|
|
|
|
tcs = gettcharset();
|
|
+
|
|
+ encflags = 0;
|
|
+ if (has_highbit(in->s))
|
|
+ isclean |= (MIME_HIGHBIT|encflags);
|
|
+
|
|
maxstor = in->l;
|
|
out->s = smalloc(maxstor + 1);
|
|
out->l = 0;
|
|
@@ -1010,7 +1137,7 @@ mime_fromhdr(struct str *in, struct str
|
|
#ifdef HAVE_ICONV
|
|
if (fhicd != (iconv_t)-1)
|
|
iconv_close(fhicd);
|
|
- if (strcmp(cs, tcs))
|
|
+ if (asccasecmp(cs, tcs))
|
|
fhicd = iconv_open_ft(tcs, cs);
|
|
else
|
|
fhicd = (iconv_t)-1;
|
|
@@ -1105,12 +1232,17 @@ notmime:
|
|
}
|
|
fromhdr_end:
|
|
*q = '\0';
|
|
- if (flags & TD_ISPR) {
|
|
+ if ((flags & TD_ISPR) && (isclean & MIME_HIGHBIT)) {
|
|
struct str new;
|
|
+ if ((isclean & MIME_UTF8) && asccasecmp("utf-8", tcs) == 0)
|
|
+ goto skip;
|
|
+ if ((isclean & MIME_LATIN) && ascncasecmp("iso-8859-", tcs, 9) == 0)
|
|
+ goto skip;
|
|
makeprint(out, &new);
|
|
free(out->s);
|
|
*out = new;
|
|
}
|
|
+skip:
|
|
if (flags & TD_DELCTRL)
|
|
out->l = delctrl(out->s, out->l);
|
|
#ifdef HAVE_ICONV
|
|
--- sendout.c
|
|
+++ sendout.c 2012-03-23 18:30:50.949933289 +0000
|
|
@@ -226,6 +226,23 @@ attach_file1(struct attachment *ap, FILE
|
|
charset = ap->a_charset;
|
|
convert = get_mime_convert(fi, &contenttype, &charset, &isclean,
|
|
dosign);
|
|
+#ifdef HAVE_ICONV
|
|
+ tcs = gettcharset();
|
|
+ if (isclean & MIME_UTF8)
|
|
+ {
|
|
+ tcs = "utf-8";
|
|
+ }
|
|
+ if (isclean & MIME_LATIN) {
|
|
+ tcs = value("charset");
|
|
+ if (tcs == NULL && wantcharset && wantcharset != (char *)-1)
|
|
+ tcs = wantcharset;
|
|
+ }
|
|
+ if (tcs == NULL) {
|
|
+ contenttype = "application/octet-stream";
|
|
+ charset = NULL;
|
|
+ convert = CONV_TOB64;
|
|
+ }
|
|
+#endif
|
|
fprintf(fo,
|
|
"\n--%s\n"
|
|
"Content-Type: %s",
|
|
@@ -255,11 +272,10 @@ attach_file1(struct attachment *ap, FILE
|
|
iconv_close(iconvd);
|
|
iconvd = (iconv_t)-1;
|
|
}
|
|
- tcs = gettcharset();
|
|
if ((isclean & (MIME_HASNUL|MIME_CTRLCHAR)) == 0 &&
|
|
ascncasecmp(contenttype, "text/", 5) == 0 &&
|
|
- isclean & MIME_HIGHBIT &&
|
|
- charset != NULL) {
|
|
+ (isclean & MIME_HIGHBIT) &&
|
|
+ charset != NULL && tcs != NULL) {
|
|
if ((iconvd = iconv_open_ft(charset, tcs)) == (iconv_t)-1 &&
|
|
errno != 0) {
|
|
if (errno == EINVAL)
|
|
@@ -467,11 +483,12 @@ infix(struct header *hp, FILE *fi, int d
|
|
}
|
|
rm(tempMail);
|
|
Ftfree(&tempMail);
|
|
- convert = get_mime_convert(fi, &contenttype, &charset,
|
|
- &isclean, dosign);
|
|
+ convert = get_mime_convert(fi, &contenttype, &charset, &isclean,
|
|
+ dosign);
|
|
#ifdef HAVE_ICONV
|
|
tcs = gettcharset();
|
|
- if ((convhdr = need_hdrconv(hp, GTO|GSUBJECT|GCC|GBCC|GIDENT)) != 0) {
|
|
+ if ((convhdr = need_hdrconv(hp, GTO|GSUBJECT|GCC|GBCC|GIDENT)) != 0 &&
|
|
+ tcs != NULL) {
|
|
if (iconvd != (iconv_t)-1)
|
|
iconv_close(iconvd);
|
|
if ((iconvd = iconv_open_ft(convhdr, tcs)) == (iconv_t)-1
|
|
@@ -505,10 +522,17 @@ infix(struct header *hp, FILE *fi, int d
|
|
iconv_close(iconvd);
|
|
iconvd = (iconv_t)-1;
|
|
}
|
|
+ if (isclean & MIME_UTF8)
|
|
+ tcs = "utf-8";
|
|
+ if (isclean & MIME_LATIN) {
|
|
+ tcs = value("charset");
|
|
+ if (tcs == NULL && wantcharset && wantcharset != (char *)-1)
|
|
+ tcs = wantcharset;
|
|
+ }
|
|
if ((isclean & (MIME_HASNUL|MIME_CTRLCHAR)) == 0 &&
|
|
ascncasecmp(contenttype, "text/", 5) == 0 &&
|
|
- isclean & MIME_HIGHBIT &&
|
|
- charset != NULL) {
|
|
+ (isclean & MIME_HIGHBIT) &&
|
|
+ charset != NULL && tcs != NULL) {
|
|
if (iconvd != (iconv_t)-1)
|
|
iconv_close(iconvd);
|
|
if ((iconvd = iconv_open_ft(charset, tcs)) == (iconv_t)-1
|