Files
man/man-db-2.4.3-chinese.dif

91 lines
3.2 KiB
Plaintext
Raw Normal View History

--- src/encodings.c
+++ src/encodings.c 2007-07-19 14:21:02.681951968 +0200
@@ -106,6 +106,8 @@ static struct directory_entry directory_
{ "cs", "ISO-8859-2", "ISO-8859-2" }, /* Czech */
{ "hr", "ISO-8859-2", "ISO-8859-2" }, /* Croatian */
{ "hu", "ISO-8859-2", "ISO-8859-2" }, /* Hungarian */
+ { "zh", "GB18030", "GB18030" }, /* Simiplfy Chinese */
+ { "tw", "BIG5", "BIG5" }, /* Tradition Chinese */
{ "ja", "EUC-JP", "EUC-JP" }, /* Japanese */
{ "ko", "EUC-KR", "EUC-KR" }, /* Korean */
{ "pl", "ISO-8859-2", "ISO-8859-2" }, /* Polish */
@@ -136,6 +138,8 @@ static struct charset_entry charset_tabl
#ifdef MULTIBYTE_GROFF
{ "EUC-JP", "nippon" },
+ { "GB18030", "nippon" },
+ { "BIG5", "nippon" },
#endif /* MULTIBYTE_GROFF */
{ NULL, NULL }
@@ -170,7 +174,7 @@ static struct device_entry device_table[
#ifdef MULTIBYTE_GROFF
{ "ascii8", NULL, NULL },
- { "nippon", "EUC-JP", "EUC-JP" },
+ { "nippon", NULL, NULL },
#endif /* MULTIBYTE_GROFF */
{ NULL, NULL, NULL }
@@ -193,6 +197,7 @@ static struct less_charset_entry less_ch
{ "UTF-8", "utf-8" },
#ifdef MULTIBYTE_GROFF
+ { "GB18030", "zh" },
{ "EUC-JP", "ja" },
{ "KOI8-R", "koi8-r" },
#endif /* MULTIBYTE_GROFF */
@@ -283,6 +288,13 @@ char *get_page_encoding (const char *lan
* roff encoding = UTF-8
* output encoding = UTF-8
* EUC-JP -> iconv -> UTF-8 -> groff -Tutf8 -> UTF-8
+ *
+ * /usr/share/man/zh_CN.GB18030, locale zh_CN.UTF-8
+ * page encoding = GB18030
+ * source encoding = GB18030
+ * roff encoding = UTF-8
+ * output encoding = UTF-8
+ * EUC-JP -> iconv -> UTF-8 -> groff -Tutf8 -> UTF-8
*/
const char *get_source_encoding (const char *lang)
{
@@ -386,10 +398,13 @@ static int compatible_encodings (const c
return 1;
#ifdef MULTIBYTE_GROFF
- /* Special case for ja_JP.UTF-8, which takes UTF-8 input recoded
- * from EUC-JP and produces UTF-8 output. This is rather filthy.
+ /* Special case for ja_JP.UTF-8 zh_CN.UTF-8 and zh_TW.UTF8, which
+ * takes UTF-8 input recoded from EUC-JP and produces UTF-8 output.
+ * This is rather filthy.
*/
- if (STREQ (input, "EUC-JP") && STREQ (output, "UTF-8"))
+ if ((STREQ (input, "EUC-JP") || STREQ (input, "BIG5") ||
+ STREQ (input, "GB18030") || STREQ (input, "GBK")) &&
+ STREQ (output, "UTF-8"))
return 1;
#endif /* MULTIBYTE_GROFF */
@@ -449,13 +464,17 @@ const char *get_roff_encoding (const cha
#ifdef MULTIBYTE_GROFF
/* An ugly special case is needed here. The utf8 device normally
* takes ISO-8859-1 input. However, with the multibyte patch, when
- * recoding from EUC-JP it takes UTF-8 input instead. This is evil,
- * but there's not much that can be done about it apart from waiting
- * for groff 2.0.
+ * recoding from EUC-JP, GB18030 or BIG5 it takes UTF-8 input
+ * instead. This is evil, but there's not much that can be done
+ * about it apart from waiting for groff 2.0.
+ *
*/
if (STREQ (device, "utf8")) {
const char *ctype = setlocale (LC_CTYPE, NULL);
- if (STREQ (ctype, "ja_JP.UTF-8"))
+ if (STREQ (ctype, "ja_JP.UTF-8") ||
+ STREQ (ctype, "zh_CN.UTF-8") ||
+ STREQ (ctype, "zh_SG.UTF-8") ||
+ STREQ (ctype, "zh_TW.UTF-8"))
roff_encoding = "UTF-8";
}
#endif /* MULTIBYTE_GROFF */