update-desktop-files/legacy-mixed_to_utf8

176 lines
3.8 KiB
Bash

#! /bin/bash
# gnome1desktopiconv
# Converts GNOME1 style desktop and directory files to UTF-8.
# Author: Stanislav Brabec <sbrabec@suse.cz>
unset ${!CHARSET_*}
CHARSET_aa=ISO-8859-1
CHARSET_af=ISO-8859-1
CHARSET_am=UTF-8
CHARSET_an=ISO-8859-15
CHARSET_ar=ISO-8859-6
CHARSET_az=UTF-8
CHARSET_be=CP1251
CHARSET_bg=CP1251
CHARSET_bn=UTF-8
CHARSET_br=ISO-8859-1
CHARSET_bs=ISO-8859-2
CHARSET_byn=UTF-8
CHARSET_ca=ISO-8859-1
CHARSET_cs=ISO-8859-2
CHARSET_cy=ISO-8859-14
CHARSET_da=ISO-8859-1
CHARSET_de=ISO-8859-1
CHARSET_el=ISO-8859-7
CHARSET_en=ISO-8859-1
CHARSET_es=ISO-8859-1
CHARSET_et=ISO-8859-1
CHARSET_eu=ISO-8859-1
CHARSET_fa=UTF-8
CHARSET_fi=ISO-8859-1
CHARSET_fo=ISO-8859-1
CHARSET_fr=ISO-8859-1
CHARSET_ga=ISO-8859-1
CHARSET_gd=ISO-8859-15
CHARSET_gez=UTF-8
CHARSET_gl=ISO-8859-1
CHARSET_gu=UTF-8
CHARSET_gv=ISO-8859-1
CHARSET_he=ISO-8859-8
CHARSET_hi=UTF-8
CHARSET_hr=ISO-8859-2
CHARSET_hu=ISO-8859-2
CHARSET_id=ISO-8859-1
CHARSET_is=ISO-8859-1
CHARSET_it=ISO-8859-1
CHARSET_iw=ISO-8859-8
CHARSET_ja=eucjp
CHARSET_ka=GEORGIAN-PS
CHARSET_kl=ISO-8859-1
CHARSET_kn=UTF-8
CHARSET_ko=euckr
CHARSET_kw=ISO-8859-1
CHARSET_lg=ISO-8859-10
CHARSET_lo=UTF-8
CHARSET_lt=ISO-8859-13
CHARSET_lv=ISO-8859-13
CHARSET_mi=ISO-8859-13
CHARSET_mk=ISO-8859-5
CHARSET_ml=UTF-8
CHARSET_mn=UTF-8
CHARSET_mr=UTF-8
CHARSET_ms=ISO-8859-1
CHARSET_mt=ISO-8859-3
CHARSET_nb=ISO-8859-1
CHARSET_ne=UTF-8
CHARSET_nl=ISO-8859-1
CHARSET_nn=ISO-8859-1
CHARSET_no=ISO-8859-1
CHARSET_oc=ISO-8859-1
CHARSET_om=ISO-8859-1
CHARSET_pa=UTF-8
CHARSET_pl=ISO-8859-2
CHARSET_pt=ISO-8859-1
# Warning: Default is ISO-8859-5, but Russian GNOME1 translators have used KOI8-R without notify.
CHARSET_ru=KOI8-R #ISO-8859-5
CHARSET_ru_UA=KOI8-U
CHARSET_ro=ISO-8859-2
CHARSET_se=UTF-8
CHARSET_sh=ISO-8859-2
CHARSET_sid=UTF-8
CHARSET_sk=ISO-8859-2
CHARSET_sl=ISO-8859-2
CHARSET_so=ISO-8859-1
CHARSET_sq=ISO-8859-1
CHARSET_sr=ISO-8859-2
CHARSET_sr__Latn=ISO-8859-2
CHARSET_st=ISO-8859-1
CHARSET_sv=ISO-8859-1
CHARSET_ta=UTF-8
CHARSET_te=UTF-8
CHARSET_tg=KOI8-T
CHARSET_th=TIS-620
CHARSET_tig=UTF-8
CHARSET_ti=UTF-8
CHARSET_tl=ISO-8859-1
CHARSET_tr=ISO-8859-9
CHARSET_uk=KOI8-U
CHARSET_ur=UTF-8
CHARSET_uz=ISO-8859-1
CHARSET_vi=UTF-8
CHARSET_wa=ISO-8859-1
CHARSET_xh=ISO-8859-1
CHARSET_yi=CP1255
CHARSET_zu=ISO-8859-1
CHARSET_zh=GB2312
CHARSET_zh_SG=GB2312
CHARSET_zh_HK=BIG5-HKSCS
CHARSET_zh_TW=BIG5
CHARSET_zh_TW___Big5=BIG5
if ! grep -q ^Encoding=Legacy-Mixed $1 ; then
if grep -q ^Encoding= $1 ; then
exit
fi
fi
echo >&2 "$0 warning: File $1 is probably in deprecated Legacy-Mixed encoding. Converting to UTF-8."
exec <$1
exec >$1.utf8
while read LINE ; do
LNG=${LINE%%]=*}
LNG=${LNG#Name[}
LNG=${LNG#GenericName[}
LNG=${LNG#Comment[}
if test "$LNG" = "$LINE" ; then
if test "$LINE" = Encoding=Legacy-Mixed ; then
continue
fi
echo $LINE
if test "$LINE" = "[Desktop Entry]" ; then
echo "Encoding=UTF-8"
fi
else
LNG=${LNG//@/__}
LNG=${LNG//./___}
eval LNC=\$CHARSET_$LNG
if test -z "$LNC" ; then
LNG=${LNG%%_*}
eval LNC=\$CHARSET_$LNG
if test -z "$LNC" ; then
echo >&2 "$0 warning: Unknown encoding for $LNG. Assuming UTF-8."
LNC=UTF-8
fi
fi
# Try whether it is parsable as UTF-8.
if test $LNC = UTF-8 ; then
UTFLINE=$LINE
else
UTFLINE=$(echo "$LINE" | iconv 2>/dev/null -f UTF-8 -t UTF-8)
if test $? -eq 0 ; then
UTFLINE=$(echo "$LINE" | iconv 2>/dev/null -f UTF-8 -t ASCII)
if test $? -gt 0 ; then
echo >&2 "$0 warning: Following line seems to be already in UTF-8 instead of Legacy."
echo >&2 "$LINE"
UTFLINE=$LINE
else
UTFLINE=$LINE
fi
else
UTFLINE=$(echo "$LINE" | iconv -f $LNC -t UTF-8)
if test $? -gt 0 ; then
echo >&2 "$0 error: Cannot convert following line."
echo >&2 "$LINE"
UTFLINE=$LINE
fi
fi
fi
echo $UTFLINE
fi
done
mv $1.utf8 $1