groff/utf8.patch

165 lines
4.4 KiB
Diff

diff -ru groff-1.18.1.1.orig/src/roff/nroff/nroff.sh groff-1.18.1.1/src/roff/nroff/nroff.sh
--- groff-1.18.1.1.orig/src/roff/nroff/nroff.sh 2002-02-07 09:36:54.000000000 +0100
+++ groff-1.18.1.1/src/roff/nroff/nroff.sh 2006-02-09 15:33:14.000000000 +0100
@@ -81,11 +87,159 @@
shift
done
+# special hack to display Japanese and Czech man pages correctly in UTF-8 locale:
+ICONV="cat"
+case "`locale charmap 2>/dev/null`" in
+ UTF-8)
+ case "${LANGUAGE-${LC_ALL-${LC_MESSAGES-${LANG}}}}" in
+ ja*)
+ # Japanese man page in UTF-8 locale, special case!
+ # force the device 'nippon' to run groff in ja_JP.eucJP locale
+ # and convert the result to UTF-8 using iconv:
+ T=-Tnippon
+ export LC_ALL=ja_JP.eucJP
+ ICONV="iconv -f EUC-JP -t UTF-8"
+ ;;
+ cs*|hu*|hr*|pl*)
+ # Czech, Hungarian, ... in UTF-8 seem to need special treatment as well:
+ T=-Tascii8
+ export LC_ALL=cs_CZ.ISO-8859-2
+ ICONV="iconv -f ISO-8859-2 -t UTF-8"
+ ;;
+ ru_RU*)
+ # Russian man page sources are in KOI8-R
+ T=-Tascii8
+ export LC_ALL=ru_RU.KOI8-R
+ ICONV="iconv -f KOI8-R -t UTF-8"
+ ;;
+ # make 'man iso-8859-15' display correctly in UTF-8 locales using Euro
+ ca_ES*|de_AT*|de_BE*|de_DE*|de_LU*|en_BE*|en_IE*|es_ES*|eu_ES*|fi_FI*|fr_BE*|fr_FR*|fr_LU*|ga_IE*|gl_ES*|it_IT*|nl_BE*|nl_NL*|pt_PT*|sv_FI*|wa_BE*)
+ T=-Tlatin1
+ export LC_ALL=de_DE@euro
+ ICONV="iconv -f ISO-8859-15 -t UTF-8"
+ ;;
+ esac
+ ;;
+esac
+
+guess_legacy_encoding () {
+ # Guess the legacy encoding used by the language/country
+ # found in the current LC_CTYPE value.
+
+ # First determine the LC_CTYPE locale category setting
+ ctype=${LC_ALL-${LC_CTYPE-${LANG-en_US}}}
+
+ case $ctype in
+ zh_TW*)
+ LEGACY_ENCODING=Big5
+ ;;
+ zh_HK*)
+ LEGACY_ENCODING=Big5HKSCS
+ ;;
+ zh*)
+ LEGACY_ENCODING=GB2312
+ ;;
+ ja*)
+ LEGACY_ENCODING=EUC-JP
+ ;;
+ ko*)
+ LEGACY_ENCODING=EUC-KR
+ ;;
+ ru*)
+ LEGACY_ENCODING=KOI8-R
+ ;;
+ uk*)
+ LEGACY_ENCODING=KOI8-U
+ ;;
+ pl*|hr*|hu*|cs*|sk*|sl*)
+ LEGACY_ENCODING=ISO-8859-2
+ ;;
+ eo*|mt*)
+ LEGACY_ENCODING=ISO-8859-3
+ ;;
+ el*)
+ LEGACY_ENCODING=ISO-8859-7
+ ;;
+ he*)
+ LEGACY_ENCODING=ISO-8859-8
+ ;;
+ tr*)
+ LEGACY_ENCODING=ISO-8859-9
+ ;;
+ th*)
+ LEGACY_ENCODING=TIS-620 # or ISO-8859-11
+ ;;
+ lt*)
+ LEGACY_ENCODING=ISO-8859-13
+ ;;
+ cy*)
+ LEGACY_ENCODING=ISO-8859-14
+ ;;
+ ro*)
+ LEGACY_ENCODING=ISO-8859-14 # or ISO-8859-16
+ ;;
+ am*|vi*)
+ LEGACY_ENCODING=UTF-8
+ ;;
+ *)
+ LEGACY_ENCODING=ISO-8859-1
+ ;;
+ esac
+}
+
+guess_legacy_encoding;
+TMPDIR=`mktemp -d /tmp/nroff.XXXXXX`
+if [ $? -ne 0 ]; then
+ echo "$0: Can't create temp directory, exiting..."
+ exit 1
+fi
+trap "exec rm -rf $TMPDIR" EXIT SIGHUP SIGINT SIGPIPE SIGTERM SIGIO
+
+# parse groff options to find out whether a file was given as argument or whether the
+# input is read from stdin:
+OLDARGS=""
+TEMP=$(getopt --options "abcCd:eEf:F:gGhiI:lL:m:M:n:No:pP:r:RsStT:UvVw:W:XzZ" --longoptions help,version -- ${1+"$@"})
+eval set -- "$TEMP"
+while true ; do
+ case "$1" in
+ --)
+ if [ -n "$2" ] ; then
+ INPUTFILE="$2"
+ shift 2
+ else
+ INPUTFILE=""
+ shift 1
+ fi
+ break
+ ;;
+ *)
+ OLDARGS="$OLDARGS $1"
+ echo $1
+ shift 1
+ ;;
+ esac
+done
+eval set -- "$OLDARGS"
+
+if [ -n "$INPUTFILE" ] ; then
+ # input comes from a file
+ cat "$INPUTFILE" > $TMPDIR/input
+else
+ # input comes from stdin
+ cat > $TMPDIR/input
+fi
+
+iconv -s -c -f utf-8 -t utf-8 < $TMPDIR/input > /dev/null
+if [ $? -eq 0 ]; then
+ iconv -s -c -f utf8 -t $LEGACY_ENCODING < $TMPDIR/input > $TMPDIR/input.new
+ mv $TMPDIR/input.new $TMPDIR/input
+fi
+
# This shell script is intended for use with man, so warnings are
# probably not wanted. Also load nroff-style character definitions.
: ${GROFF_BIN_PATH=@BINDIR@}
export GROFF_BIN_PATH
-PATH=$GROFF_BIN_PATH:$PATH groff -mtty-char $T $opts ${1+"$@"}
+PATH=$GROFF_BIN_PATH:$PATH groff -mtty-char $T $opts ${1+"$@"} < $TMPDIR/input | $ICONV
# eof