--- src/encodings.c +++ src/encodings.c 2007-07-19 14:21:02.681951968 +0200 @@ -106,6 +106,8 @@ static struct directory_entry directory_ { "cs", "ISO-8859-2", "ISO-8859-2" }, /* Czech */ { "hr", "ISO-8859-2", "ISO-8859-2" }, /* Croatian */ { "hu", "ISO-8859-2", "ISO-8859-2" }, /* Hungarian */ + { "zh", "GB18030", "GB18030" }, /* Simiplfy Chinese */ + { "tw", "BIG5", "BIG5" }, /* Tradition Chinese */ { "ja", "EUC-JP", "EUC-JP" }, /* Japanese */ { "ko", "EUC-KR", "EUC-KR" }, /* Korean */ { "pl", "ISO-8859-2", "ISO-8859-2" }, /* Polish */ @@ -136,6 +138,8 @@ static struct charset_entry charset_tabl #ifdef MULTIBYTE_GROFF { "EUC-JP", "nippon" }, + { "GB18030", "nippon" }, + { "BIG5", "nippon" }, #endif /* MULTIBYTE_GROFF */ { NULL, NULL } @@ -170,7 +174,7 @@ static struct device_entry device_table[ #ifdef MULTIBYTE_GROFF { "ascii8", NULL, NULL }, - { "nippon", "EUC-JP", "EUC-JP" }, + { "nippon", NULL, NULL }, #endif /* MULTIBYTE_GROFF */ { NULL, NULL, NULL } @@ -193,6 +197,7 @@ static struct less_charset_entry less_ch { "UTF-8", "utf-8" }, #ifdef MULTIBYTE_GROFF + { "GB18030", "zh" }, { "EUC-JP", "ja" }, { "KOI8-R", "koi8-r" }, #endif /* MULTIBYTE_GROFF */ @@ -283,6 +288,13 @@ char *get_page_encoding (const char *lan * roff encoding = UTF-8 * output encoding = UTF-8 * EUC-JP -> iconv -> UTF-8 -> groff -Tutf8 -> UTF-8 + * + * /usr/share/man/zh_CN.GB18030, locale zh_CN.UTF-8 + * page encoding = GB18030 + * source encoding = GB18030 + * roff encoding = UTF-8 + * output encoding = UTF-8 + * EUC-JP -> iconv -> UTF-8 -> groff -Tutf8 -> UTF-8 */ const char *get_source_encoding (const char *lang) { @@ -386,10 +398,13 @@ static int compatible_encodings (const c return 1; #ifdef MULTIBYTE_GROFF - /* Special case for ja_JP.UTF-8, which takes UTF-8 input recoded - * from EUC-JP and produces UTF-8 output. This is rather filthy. + /* Special case for ja_JP.UTF-8 zh_CN.UTF-8 and zh_TW.UTF8, which + * takes UTF-8 input recoded from EUC-JP and produces UTF-8 output. + * This is rather filthy. */ - if (STREQ (input, "EUC-JP") && STREQ (output, "UTF-8")) + if ((STREQ (input, "EUC-JP") || STREQ (input, "BIG5") || + STREQ (input, "GB18030") || STREQ (input, "GBK")) && + STREQ (output, "UTF-8")) return 1; #endif /* MULTIBYTE_GROFF */ @@ -449,13 +464,17 @@ const char *get_roff_encoding (const cha #ifdef MULTIBYTE_GROFF /* An ugly special case is needed here. The utf8 device normally * takes ISO-8859-1 input. However, with the multibyte patch, when - * recoding from EUC-JP it takes UTF-8 input instead. This is evil, - * but there's not much that can be done about it apart from waiting - * for groff 2.0. + * recoding from EUC-JP, GB18030 or BIG5 it takes UTF-8 input + * instead. This is evil, but there's not much that can be done + * about it apart from waiting for groff 2.0. + * */ if (STREQ (device, "utf8")) { const char *ctype = setlocale (LC_CTYPE, NULL); - if (STREQ (ctype, "ja_JP.UTF-8")) + if (STREQ (ctype, "ja_JP.UTF-8") || + STREQ (ctype, "zh_CN.UTF-8") || + STREQ (ctype, "zh_SG.UTF-8") || + STREQ (ctype, "zh_TW.UTF-8")) roff_encoding = "UTF-8"; } #endif /* MULTIBYTE_GROFF */