--- def.h +++ def.h 2007-04-17 13:32:43.960721000 +0200 @@ -142,7 +142,8 @@ enum mimeclean { MIME_LONGLINES = 002, /* has lines too long for RFC 2822 */ MIME_CTRLCHAR = 004, /* contains control characters */ MIME_HASNUL = 010, /* contains \0 characters */ - MIME_NOTERMNL = 020 /* lacks a terminating newline */ + MIME_NOTERMNL = 020, /* lacks a terminating newline */ + MIME_UTF8 = 040 /* UTF-8 high bit characters */ }; enum tdflags { --- mime.c +++ mime.c 2007-04-17 15:08:06.352702764 +0200 @@ -258,7 +258,10 @@ getcharset(int isclean) if (isclean & (MIME_CTRLCHAR|MIME_HASNUL)) charset = NULL; else if (isclean & MIME_HIGHBIT) { - charset = wantcharset ? wantcharset : value("charset"); + if (isclean & MIME_UTF8) + charset = defcharset; + if (charset == NULL) + charset = wantcharset ? wantcharset : value("charset"); if (charset == NULL) { charset = defcharset; } @@ -730,6 +733,7 @@ mime_isclean(FILE *f) lastc = c; c = getc(f); curlen++; + check: if (c == '\n' || c == EOF) { /* * RFC 821 imposes a maximum line length of 1000 @@ -742,10 +746,38 @@ mime_isclean(FILE *f) curlen = 1; } else if (c & 0200) { isclean |= MIME_HIGHBIT; + if (c & 0100) { + int n, follow; + + if ((c & 040) == 0) /* 110xxxxx */ + follow = 1; + else if ((c & 020) == 0) /* 1110xxxx */ + follow = 2; + else if ((c & 010) == 0) /* 11110xxx */ + follow = 3; + else if ((c & 004) == 0) /* 111110xx */ + follow = 4; + else if ((c & 002) == 0) /* 1111110x */ + follow = 5; + else + continue; + + for (n = 0; n < follow; n++) { + lastc = c; + c = getc(f); + + if ((c & 0200) == 0 || (c & 0100) || + (c == '\0') || (c == '\n') || (c == EOF)) { + curlen += n; + goto check; + } + } + isclean |= MIME_UTF8; + } } else if (c == '\0') { isclean |= MIME_HASNUL; break; - } else if ((c < 040 && (c != '\t' && c != '\f')) || c == 0177) { + } else if ((c < 040 && (c != '\t' && c != '\f' && c != '\b')) || c == 0177) { isclean |= MIME_CTRLCHAR; } } while (c != EOF); @@ -814,13 +846,16 @@ get_mime_convert(FILE *fp, char **conten * ^I or ^L in text/plain bodies. However, some * obscure character sets actually contain these * characters, so the content type can be set. + * Beside ^I or ^L from RFC 2046 we accept also + * backspace ^H often used in enhanced text. */ if ((*contenttype = value("contenttype-cntrl")) == NULL) *contenttype = "application/octet-stream"; } else *contenttype = "text/plain"; *charset = getcharset(*isclean); - } + } else if (ascncasecmp(*contenttype, "text/", 5) == 0) + *charset = getcharset(*isclean); return convert; } --- sendout.c +++ sendout.c 2007-04-17 15:11:40.576871634 +0200 @@ -206,7 +206,7 @@ attach_file(struct attachment *ap, FILE size_t bufsize, count; int lastc = EOF; #ifdef HAVE_ICONV - char *tcs; + char *tcs = NULL; #endif if ((fi = Fopen(ap->a_name, "r")) == NULL) { @@ -231,8 +231,16 @@ attach_file(struct attachment *ap, FILE send_boundary, contenttype); if (charset == NULL) putc('\n', fo); - else - fprintf(fo, ";\n charset=%s\n", charset); + else { +#ifdef HAVE_ICONV + if (wantcharset && ascncasecmp(wantcharset, "ANSI_X3.4", 9)) + tcs = wantcharset; + if (tcs) + fprintf(fo, ";\n charset=%s\n", tcs); + else +#endif + fprintf(fo, ";\n charset=%s\n", charset); + } if (ap->a_content_disposition == NULL) ap->a_content_disposition = "attachment"; fprintf(fo, "Content-Transfer-Encoding: %s\n" @@ -254,16 +262,15 @@ attach_file(struct attachment *ap, FILE iconv_close(iconvd); iconvd = (iconv_t)-1; } - tcs = gettcharset(); if ((isclean & (MIME_HASNUL|MIME_CTRLCHAR)) == 0 && ascncasecmp(contenttype, "text/", 5) == 0 && isclean & MIME_HIGHBIT && - charset != NULL && asccasecmp(charset, tcs)) { - if ((iconvd = iconv_open_ft(charset, tcs)) == (iconv_t)-1 && + charset != NULL && tcs != NULL && asccasecmp(charset, tcs)) { + if ((iconvd = iconv_open_ft(tcs, charset)) == (iconv_t)-1 && errno != 0) { if (errno == EINVAL) fprintf(stderr, catgets(catd, CATSET, 179, - "Cannot convert from %s to %s\n"), tcs, charset); + "Cannot convert from %s to %s\n"), charset, tcs); else perror("iconv_open"); Fclose(fi); @@ -467,6 +474,7 @@ infix(struct header *hp, FILE *fi, int d } if ((isclean & (MIME_HASNUL|MIME_CTRLCHAR)) == 0 && ascncasecmp(contenttype, "text/", 5) == 0 && + ascncasecmp(tcs, "ANSI_X3.4", 9) && isclean & MIME_HIGHBIT && charset != NULL && asccasecmp(charset, tcs)) { if (iconvd != (iconv_t)-1) @@ -1160,8 +1168,9 @@ puthead(struct header *hp, FILE *fo, enu return 1; if ((addr = hp->h_organization) != NULL || (addr = value("ORGANIZATION")) != NULL) { + size_t len = strlen(addr); fwrite("Organization: ", sizeof (char), 14, fo); - if (mime_write(addr, sizeof *addr, strlen(addr), fo, + if (len && mime_write(addr, sizeof *addr, len, fo, action == SEND_TODISP ? CONV_NONE:CONV_TOHDR, action == SEND_TODISP ? @@ -1206,9 +1215,10 @@ puthead(struct header *hp, FILE *fo, enu if (hp->h_subject != NULL && w & GSUBJECT) { fwrite("Subject: ", sizeof (char), 9, fo); if (ascncasecmp(hp->h_subject, "re: ", 4) == 0) { + size_t len = strlen(hp->h_subject + 4); fwrite("Re: ", sizeof (char), 4, fo); - if (mime_write(hp->h_subject + 4, sizeof *hp->h_subject, - strlen(hp->h_subject + 4), + if (len && mime_write(hp->h_subject + 4, + sizeof *hp->h_subject, len, fo, action == SEND_TODISP ? CONV_NONE:CONV_TOHDR, action == SEND_TODISP ? @@ -1216,8 +1226,9 @@ puthead(struct header *hp, FILE *fo, enu NULL, (size_t)0) == 0) return 1; } else if (*hp->h_subject) { - if (mime_write(hp->h_subject, sizeof *hp->h_subject, - strlen(hp->h_subject), + size_t len = strlen(hp->h_subject); + if (len && mime_write(hp->h_subject, + sizeof *hp->h_subject, len, fo, action == SEND_TODISP ? CONV_NONE:CONV_TOHDR, action == SEND_TODISP ?