man/man-db-2.4.3-chinese.dif

--- src/encodings.c
+++ src/encodings.c	2007-07-19 14:21:02.681951968 +0200
@@ -106,6 +106,8 @@ static struct directory_entry directory_
 	{ "cs",		"ISO-8859-2",	"ISO-8859-2"		}, /* Czech */
 	{ "hr",		"ISO-8859-2",	"ISO-8859-2"		}, /* Croatian */
 	{ "hu",		"ISO-8859-2",	"ISO-8859-2"		}, /* Hungarian */
+	{ "zh",		"GB18030",	"GB18030"		}, /* Simiplfy Chinese */
+	{ "tw",		"BIG5",		"BIG5"			}, /* Tradition Chinese */
 	{ "ja",		"EUC-JP",	"EUC-JP"		}, /* Japanese */
 	{ "ko",		"EUC-KR",	"EUC-KR"		}, /* Korean */
 	{ "pl",		"ISO-8859-2",	"ISO-8859-2"		}, /* Polish */
@@ -136,6 +138,8 @@ static struct charset_entry charset_tabl
 
 #ifdef MULTIBYTE_GROFF
 	{ "EUC-JP",		"nippon"	},
+	{ "GB18030",		"nippon"	},
+	{ "BIG5",		"nippon"	},
 #endif /* MULTIBYTE_GROFF */
 
 	{ NULL,			NULL		}
@@ -170,7 +174,7 @@ static struct device_entry device_table[
 
 #ifdef MULTIBYTE_GROFF
 	{ "ascii8",	NULL,		NULL			},
-	{ "nippon",	"EUC-JP",	"EUC-JP"		},
+	{ "nippon",	NULL,		NULL			},
 #endif /* MULTIBYTE_GROFF */
 
 	{ NULL,		NULL,		NULL			}
@@ -193,6 +197,7 @@ static struct less_charset_entry less_ch
 	{ "UTF-8",		"utf-8"		},
 
 #ifdef MULTIBYTE_GROFF
+	{ "GB18030",		"zh"		},
 	{ "EUC-JP",		"ja"		},
 	{ "KOI8-R",		"koi8-r"	},
 #endif /* MULTIBYTE_GROFF */
@@ -283,6 +288,13 @@ char *get_page_encoding (const char *lan
  *     roff encoding = UTF-8
  *     output encoding = UTF-8
  *     EUC-JP -> iconv -> UTF-8 -> groff -Tutf8 -> UTF-8
+ *
+ *   /usr/share/man/zh_CN.GB18030, locale zh_CN.UTF-8
+ *     page encoding = GB18030
+ *     source encoding = GB18030
+ *     roff encoding = UTF-8
+ *     output encoding = UTF-8
+ *     EUC-JP -> iconv -> UTF-8 -> groff -Tutf8 -> UTF-8
  */
 const char *get_source_encoding (const char *lang)
 {
@@ -386,10 +398,13 @@ static int compatible_encodings (const c
 		return 1;
 
 #ifdef MULTIBYTE_GROFF
-	/* Special case for ja_JP.UTF-8, which takes UTF-8 input recoded
-	 * from EUC-JP and produces UTF-8 output. This is rather filthy.
+	/* Special case for ja_JP.UTF-8 zh_CN.UTF-8 and zh_TW.UTF8, which
+	 * takes UTF-8 input recoded from EUC-JP and produces UTF-8 output.
+	 * This is rather filthy.
 	 */
-	if (STREQ (input, "EUC-JP") && STREQ (output, "UTF-8"))
+	if ((STREQ (input, "EUC-JP") || STREQ (input, "BIG5") ||
+	     STREQ (input, "GB18030") || STREQ (input, "GBK")) &&
+	    STREQ (output, "UTF-8"))
 		return 1;
 #endif /* MULTIBYTE_GROFF */
 
@@ -449,13 +464,17 @@ const char *get_roff_encoding (const cha
 #ifdef MULTIBYTE_GROFF
 	/* An ugly special case is needed here. The utf8 device normally
 	 * takes ISO-8859-1 input. However, with the multibyte patch, when
-	 * recoding from EUC-JP it takes UTF-8 input instead. This is evil,
-	 * but there's not much that can be done about it apart from waiting
-	 * for groff 2.0.
+	 * recoding from EUC-JP, GB18030 or BIG5 it takes UTF-8 input
+	 * instead. This is evil, but there's not much that can be done
+	 * about it apart from waiting for groff 2.0.
+	 *
 	 */
 	if (STREQ (device, "utf8")) {
 		const char *ctype = setlocale (LC_CTYPE, NULL);
-		if (STREQ (ctype, "ja_JP.UTF-8"))
+		if (STREQ (ctype, "ja_JP.UTF-8") ||
+		    STREQ (ctype, "zh_CN.UTF-8") ||
+		    STREQ (ctype, "zh_SG.UTF-8") ||
+		    STREQ (ctype, "zh_TW.UTF-8"))
 			roff_encoding = "UTF-8";
 	}
 #endif /* MULTIBYTE_GROFF */
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/man?expand=0&rev=9 2007-07-21 16:03:50 +00:00			`--- src/encodings.c`
			`+++ src/encodings.c 2007-07-19 14:21:02.681951968 +0200`
			`@@ -106,6 +106,8 @@ static struct directory_entry directory_`
			`{ "cs", "ISO-8859-2", "ISO-8859-2" }, /* Czech */`
			`{ "hr", "ISO-8859-2", "ISO-8859-2" }, /* Croatian */`
			`{ "hu", "ISO-8859-2", "ISO-8859-2" }, /* Hungarian */`
			`+ { "zh", "GB18030", "GB18030" }, /* Simiplfy Chinese */`
			`+ { "tw", "BIG5", "BIG5" }, /* Tradition Chinese */`
			`{ "ja", "EUC-JP", "EUC-JP" }, /* Japanese */`
			`{ "ko", "EUC-KR", "EUC-KR" }, /* Korean */`
			`{ "pl", "ISO-8859-2", "ISO-8859-2" }, /* Polish */`
			`@@ -136,6 +138,8 @@ static struct charset_entry charset_tabl`

			`#ifdef MULTIBYTE_GROFF`
			`{ "EUC-JP", "nippon" },`
			`+ { "GB18030", "nippon" },`
			`+ { "BIG5", "nippon" },`
			`#endif /* MULTIBYTE_GROFF */`

			`{ NULL, NULL }`
			`@@ -170,7 +174,7 @@ static struct device_entry device_table[`

			`#ifdef MULTIBYTE_GROFF`
			`{ "ascii8", NULL, NULL },`
			`- { "nippon", "EUC-JP", "EUC-JP" },`
			`+ { "nippon", NULL, NULL },`
			`#endif /* MULTIBYTE_GROFF */`

			`{ NULL, NULL, NULL }`
			`@@ -193,6 +197,7 @@ static struct less_charset_entry less_ch`
			`{ "UTF-8", "utf-8" },`

			`#ifdef MULTIBYTE_GROFF`
			`+ { "GB18030", "zh" },`
			`{ "EUC-JP", "ja" },`
			`{ "KOI8-R", "koi8-r" },`
			`#endif /* MULTIBYTE_GROFF */`
			`@@ -283,6 +288,13 @@ char get_page_encoding (const char lan`
			`* roff encoding = UTF-8`
			`* output encoding = UTF-8`
			`* EUC-JP -> iconv -> UTF-8 -> groff -Tutf8 -> UTF-8`
			`+ *`
			`+ * /usr/share/man/zh_CN.GB18030, locale zh_CN.UTF-8`
			`+ * page encoding = GB18030`
			`+ * source encoding = GB18030`
			`+ * roff encoding = UTF-8`
			`+ * output encoding = UTF-8`
			`+ * EUC-JP -> iconv -> UTF-8 -> groff -Tutf8 -> UTF-8`
			`*/`
			`const char get_source_encoding (const char lang)`
			`{`
			`@@ -386,10 +398,13 @@ static int compatible_encodings (const c`
			`return 1;`

			`#ifdef MULTIBYTE_GROFF`
			`- /* Special case for ja_JP.UTF-8, which takes UTF-8 input recoded`
			`- * from EUC-JP and produces UTF-8 output. This is rather filthy.`
			`+ /* Special case for ja_JP.UTF-8 zh_CN.UTF-8 and zh_TW.UTF8, which`
			`+ * takes UTF-8 input recoded from EUC-JP and produces UTF-8 output.`
			`+ * This is rather filthy.`
			`*/`
			`- if (STREQ (input, "EUC-JP") && STREQ (output, "UTF-8"))`
			`+ if ((STREQ (input, "EUC-JP") \|\| STREQ (input, "BIG5") \|\|`
			`+ STREQ (input, "GB18030") \|\| STREQ (input, "GBK")) &&`
			`+ STREQ (output, "UTF-8"))`
			`return 1;`
			`#endif /* MULTIBYTE_GROFF */`

			`@@ -449,13 +464,17 @@ const char *get_roff_encoding (const cha`
			`#ifdef MULTIBYTE_GROFF`
			`/* An ugly special case is needed here. The utf8 device normally`
			`* takes ISO-8859-1 input. However, with the multibyte patch, when`
			`- * recoding from EUC-JP it takes UTF-8 input instead. This is evil,`
			`- * but there's not much that can be done about it apart from waiting`
			`- * for groff 2.0.`
			`+ * recoding from EUC-JP, GB18030 or BIG5 it takes UTF-8 input`
			`+ * instead. This is evil, but there's not much that can be done`
			`+ * about it apart from waiting for groff 2.0.`
			`+ *`
			`*/`
			`if (STREQ (device, "utf8")) {`
			`const char *ctype = setlocale (LC_CTYPE, NULL);`
			`- if (STREQ (ctype, "ja_JP.UTF-8"))`
			`+ if (STREQ (ctype, "ja_JP.UTF-8") \|\|`
			`+ STREQ (ctype, "zh_CN.UTF-8") \|\|`
			`+ STREQ (ctype, "zh_SG.UTF-8") \|\|`
			`+ STREQ (ctype, "zh_TW.UTF-8"))`
			`roff_encoding = "UTF-8";`
			`}`
			`#endif /* MULTIBYTE_GROFF */`