2007-01-16 00:41:34 +01:00
|
|
|
--- wvText.in
|
|
|
|
+++ wvText.in
|
|
|
|
@@ -3,35 +3,49 @@
|
|
|
|
prefix=@prefix@
|
|
|
|
exec_prefix=@exec_prefix@
|
|
|
|
datadir=@datadir@
|
|
|
|
-tmpdir=${TMPDIR:-/tmp}
|
|
|
|
|
|
|
|
# argument checking
|
|
|
|
-if [ ${#} -ne "2" ]; then
|
|
|
|
- echo "Usage: ${0} <word document> <text output file>"
|
|
|
|
- exit 1
|
|
|
|
+if [ ${#} -eq "1" ]; then
|
|
|
|
+ o_file='-'
|
|
|
|
+elif [ ${#} -ne "2" ]; then
|
|
|
|
+ echo "Usage: ${0} <word document> <text output file>"
|
|
|
|
+ exit 1
|
|
|
|
+else
|
|
|
|
+ o_file=${2}
|
|
|
|
+fi
|
|
|
|
+
|
|
|
|
+if [ "x$o_file" = "x-" ]; then
|
|
|
|
+ o_file=/dev/stdout
|
|
|
|
fi
|
|
|
|
|
|
|
|
USE_DUMP=0
|
|
|
|
-which elinks >/dev/null 2>&1
|
|
|
|
+which w3m >/dev/null 2>&1
|
|
|
|
if [ ${?} -eq "0" ]; then
|
|
|
|
- USE_DUMP=3
|
|
|
|
+ USE_DUMP=4
|
|
|
|
else
|
|
|
|
- which links >/dev/null 2>&1
|
|
|
|
+ which elinks >/dev/null 2>&1
|
|
|
|
if [ ${?} -eq "0" ]; then
|
|
|
|
- USE_DUMP=2
|
|
|
|
+ USE_DUMP=3
|
|
|
|
else
|
|
|
|
- which lynx >/dev/null 2>&1
|
|
|
|
+ which links >/dev/null 2>&1
|
|
|
|
if [ ${?} -eq "0" ]; then
|
|
|
|
- USE_DUMP=1
|
|
|
|
- fi
|
|
|
|
- fi
|
|
|
|
+ USE_DUMP=2
|
|
|
|
+ else
|
|
|
|
+ which lynx >/dev/null 2>&1
|
|
|
|
+ if [ ${?} -eq "0" ]; then
|
|
|
|
+ USE_DUMP=1
|
|
|
|
+ fi
|
|
|
|
+ fi
|
|
|
|
+ fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
+tmpdir=$(mktemp -d ${TMPDIR:-/tmp}/wvText.XXXXXXXX) || { echo "$0: can not create temporary directory" >& 2; exit 1; }
|
|
|
|
+
|
|
|
|
if [ $USE_DUMP -eq "1" ]; then
|
|
|
|
- echo "Could not find required program 'elinks' or 'links'"
|
|
|
|
+ echo "Could not find required program 'w3m', 'elinks' or 'links'"
|
2018-02-05 11:24:32 +01:00
|
|
|
echo "Using lynx. Output will be pretty ugly."
|
2007-01-16 00:41:34 +01:00
|
|
|
elif [ $USE_DUMP -eq "0" ]; then
|
|
|
|
- echo "Could not find required program 'elinks', 'links', or even 'lynx'"
|
|
|
|
+ echo "Could not find required program 'w3m', 'elinks', 'links', or even 'lynx'"
|
2018-02-05 11:24:32 +01:00
|
|
|
echo "Using wvWare -x wvText.xml. Output will be pretty bad."
|
2007-01-16 00:41:34 +01:00
|
|
|
fi
|
|
|
|
|
|
|
|
@@ -41,6 +55,7 @@
|
|
|
|
which wvHtml >/dev/null 2>&1
|
|
|
|
if [ ${?} -ne "0" ]; then
|
|
|
|
echo "Could not find required program 'wvHtml'"
|
|
|
|
+ rm -rf "${tmpdir}"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
@@ -51,32 +66,36 @@
|
|
|
|
wvHtml -1 "${1}" --targetdir="${tmpdir}" "${TMP_FILE}" >/dev/null 2>&1
|
|
|
|
if [ ${?} -ne "0" ]; then
|
|
|
|
echo "Could not convert into HTML"
|
|
|
|
+ rm -rf "${tmpdir}"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
- if [ $USE_DUMP -eq "3" ]; then
|
|
|
|
+ if [ $USE_DUMP -eq "4" ]; then
|
|
|
|
+
|
|
|
|
+ w3m -dump -T text/html "${tmpdir}/${TMP_FILE}" > "$o_file"
|
|
|
|
+ elif [ $USE_DUMP -eq "3" ]; then
|
|
|
|
# elinks does the best
|
|
|
|
- elinks -dump -force-html "${tmpdir}/${TMP_FILE}" > "${2}"
|
|
|
|
+ elinks -dump -force-html "${tmpdir}/${TMP_FILE}" > "$o_file"
|
|
|
|
elif [ $USE_DUMP -eq "2" ]; then
|
|
|
|
# links does a pretty good job
|
|
|
|
- links -dump "${tmpdir}/${TMP_FILE}" > "${2}"
|
|
|
|
+ links -dump "${tmpdir}/${TMP_FILE}" > "$o_file"
|
|
|
|
else
|
|
|
|
# lynx sucks, but does better than wvText.xml
|
|
|
|
- TERM=vt100 lynx -dump -force_html "${tmpdir}/${TMP_FILE}" > "${2}"
|
|
|
|
+ TERM=vt100 lynx -dump -force_html "${tmpdir}/${TMP_FILE}" > "$o_file"
|
|
|
|
fi;
|
|
|
|
|
|
|
|
if [ ${?} -ne "0" ]; then
|
|
|
|
echo "Could not convert into Text"
|
|
|
|
- rm -f "${tmpdir}/${TMP_FILE}"
|
|
|
|
+ rm -rf "${tmpdir}"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
- # clean up
|
|
|
|
- rm -f "${tmpdir}/${TMP_FILE}"
|
|
|
|
-
|
|
|
|
else
|
|
|
|
# fall back onto our cruddy output
|
|
|
|
# this is, admittedly, better than running
|
|
|
|
# 'strings' on the word document though :)
|
|
|
|
- wvWare -x ${datadir}/wv/wvText.xml "${1}" > "${2}"
|
|
|
|
+ wvWare -x ${datadir}/wv/wvText.xml "${1}" > "$o_file"
|
|
|
|
fi
|
|
|
|
+
|
|
|
|
+# clean up
|
|
|
|
+rm -rf "${tmpdir}"
|