91 lines
3.2 KiB
Plaintext
91 lines
3.2 KiB
Plaintext
![]() |
--- src/encodings.c
|
||
|
+++ src/encodings.c 2007-07-19 14:21:02.681951968 +0200
|
||
|
@@ -106,6 +106,8 @@ static struct directory_entry directory_
|
||
|
{ "cs", "ISO-8859-2", "ISO-8859-2" }, /* Czech */
|
||
|
{ "hr", "ISO-8859-2", "ISO-8859-2" }, /* Croatian */
|
||
|
{ "hu", "ISO-8859-2", "ISO-8859-2" }, /* Hungarian */
|
||
|
+ { "zh", "GB18030", "GB18030" }, /* Simiplfy Chinese */
|
||
|
+ { "tw", "BIG5", "BIG5" }, /* Tradition Chinese */
|
||
|
{ "ja", "EUC-JP", "EUC-JP" }, /* Japanese */
|
||
|
{ "ko", "EUC-KR", "EUC-KR" }, /* Korean */
|
||
|
{ "pl", "ISO-8859-2", "ISO-8859-2" }, /* Polish */
|
||
|
@@ -136,6 +138,8 @@ static struct charset_entry charset_tabl
|
||
|
|
||
|
#ifdef MULTIBYTE_GROFF
|
||
|
{ "EUC-JP", "nippon" },
|
||
|
+ { "GB18030", "nippon" },
|
||
|
+ { "BIG5", "nippon" },
|
||
|
#endif /* MULTIBYTE_GROFF */
|
||
|
|
||
|
{ NULL, NULL }
|
||
|
@@ -170,7 +174,7 @@ static struct device_entry device_table[
|
||
|
|
||
|
#ifdef MULTIBYTE_GROFF
|
||
|
{ "ascii8", NULL, NULL },
|
||
|
- { "nippon", "EUC-JP", "EUC-JP" },
|
||
|
+ { "nippon", NULL, NULL },
|
||
|
#endif /* MULTIBYTE_GROFF */
|
||
|
|
||
|
{ NULL, NULL, NULL }
|
||
|
@@ -193,6 +197,7 @@ static struct less_charset_entry less_ch
|
||
|
{ "UTF-8", "utf-8" },
|
||
|
|
||
|
#ifdef MULTIBYTE_GROFF
|
||
|
+ { "GB18030", "zh" },
|
||
|
{ "EUC-JP", "ja" },
|
||
|
{ "KOI8-R", "koi8-r" },
|
||
|
#endif /* MULTIBYTE_GROFF */
|
||
|
@@ -283,6 +288,13 @@ char *get_page_encoding (const char *lan
|
||
|
* roff encoding = UTF-8
|
||
|
* output encoding = UTF-8
|
||
|
* EUC-JP -> iconv -> UTF-8 -> groff -Tutf8 -> UTF-8
|
||
|
+ *
|
||
|
+ * /usr/share/man/zh_CN.GB18030, locale zh_CN.UTF-8
|
||
|
+ * page encoding = GB18030
|
||
|
+ * source encoding = GB18030
|
||
|
+ * roff encoding = UTF-8
|
||
|
+ * output encoding = UTF-8
|
||
|
+ * EUC-JP -> iconv -> UTF-8 -> groff -Tutf8 -> UTF-8
|
||
|
*/
|
||
|
const char *get_source_encoding (const char *lang)
|
||
|
{
|
||
|
@@ -386,10 +398,13 @@ static int compatible_encodings (const c
|
||
|
return 1;
|
||
|
|
||
|
#ifdef MULTIBYTE_GROFF
|
||
|
- /* Special case for ja_JP.UTF-8, which takes UTF-8 input recoded
|
||
|
- * from EUC-JP and produces UTF-8 output. This is rather filthy.
|
||
|
+ /* Special case for ja_JP.UTF-8 zh_CN.UTF-8 and zh_TW.UTF8, which
|
||
|
+ * takes UTF-8 input recoded from EUC-JP and produces UTF-8 output.
|
||
|
+ * This is rather filthy.
|
||
|
*/
|
||
|
- if (STREQ (input, "EUC-JP") && STREQ (output, "UTF-8"))
|
||
|
+ if ((STREQ (input, "EUC-JP") || STREQ (input, "BIG5") ||
|
||
|
+ STREQ (input, "GB18030") || STREQ (input, "GBK")) &&
|
||
|
+ STREQ (output, "UTF-8"))
|
||
|
return 1;
|
||
|
#endif /* MULTIBYTE_GROFF */
|
||
|
|
||
|
@@ -449,13 +464,17 @@ const char *get_roff_encoding (const cha
|
||
|
#ifdef MULTIBYTE_GROFF
|
||
|
/* An ugly special case is needed here. The utf8 device normally
|
||
|
* takes ISO-8859-1 input. However, with the multibyte patch, when
|
||
|
- * recoding from EUC-JP it takes UTF-8 input instead. This is evil,
|
||
|
- * but there's not much that can be done about it apart from waiting
|
||
|
- * for groff 2.0.
|
||
|
+ * recoding from EUC-JP, GB18030 or BIG5 it takes UTF-8 input
|
||
|
+ * instead. This is evil, but there's not much that can be done
|
||
|
+ * about it apart from waiting for groff 2.0.
|
||
|
+ *
|
||
|
*/
|
||
|
if (STREQ (device, "utf8")) {
|
||
|
const char *ctype = setlocale (LC_CTYPE, NULL);
|
||
|
- if (STREQ (ctype, "ja_JP.UTF-8"))
|
||
|
+ if (STREQ (ctype, "ja_JP.UTF-8") ||
|
||
|
+ STREQ (ctype, "zh_CN.UTF-8") ||
|
||
|
+ STREQ (ctype, "zh_SG.UTF-8") ||
|
||
|
+ STREQ (ctype, "zh_TW.UTF-8"))
|
||
|
roff_encoding = "UTF-8";
|
||
|
}
|
||
|
#endif /* MULTIBYTE_GROFF */
|