Files
stardict-tools/python3-support.patch
Lars Vogdt 94d44e40b5 Accepting request 914515 from home:StevenK:branches:Education
- Add patch: python3-support.patch
  * Convert all Python 2 scripts to Python 3
- Remove shebang mangling from the specfile for python scripts.

OBS-URL: https://build.opensuse.org/request/show/914515
OBS-URL: https://build.opensuse.org/package/show/Education/stardict-tools?expand=0&rev=16
2021-09-14 08:44:46 +00:00

603 lines
20 KiB
Diff
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
Index: stardict-tools-3.0.1/src/jm2stardict.py
===================================================================
--- stardict-tools-3.0.1.orig/src/jm2stardict.py
+++ stardict-tools-3.0.1/src/jm2stardict.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/python3
#
# converts XML JMDict to Stardict idx/dict format
# JMDict website: http://www.csse.monash.edu.au/~jwb/j_jmdict.html
@@ -27,9 +27,8 @@ import struct, sys, string, codecs,os
def text(nodes):
label = ""
- textnodes = filter(lambda x: x.nodeName == "#text", nodes)
- for t in textnodes:
- label += t.data
+ for t in [x for x in nodes if x.nodeName == "#text"]:
+ label += t.data
return label
def strcasecmp(a, b):
@@ -42,7 +41,7 @@ def strcasecmp(a, b):
# if result == 0:
result = cmp(a[0].lower() , b[0].lower())
-
+
return result
def merge_dup(list):
@@ -50,55 +49,55 @@ def merge_dup(list):
lastkey = ""
for x in list:
- if x[0] == lastkey:
- newlist[-1] = (newlist[-1][0], newlist[-1][1] + "\n" + x[1])
- else:
- newlist.append(x)
- lastkey = x[0]
+ if x[0] == lastkey:
+ newlist[-1] = (newlist[-1][0], newlist[-1][1] + "\n" + x[1])
+ else:
+ newlist.append(x)
+ lastkey = x[0]
return newlist
class JMDictHandler(ContentHandler):
def __init__(self):
- self.mapping = []
- self.state = ""
- self.buffer = ""
+ self.mapping = []
+ self.state = ""
+ self.buffer = ""
def startElement(self, name, attrs):
- if name == "entry":
- self.kanji = []
- self.chars = []
- self.gloss = []
- self.state = ""
- self.buffer = ""
- elif name == "keb":
- self.state = "keb"
- elif name == "reb":
- self.state = "reb"
- elif name == "gloss" and not attrs:
- self.state = "gloss"
- elif name == "xref":
- self.state = "xref"
-
+ if name == "entry":
+ self.kanji = []
+ self.chars = []
+ self.gloss = []
+ self.state = ""
+ self.buffer = ""
+ elif name == "keb":
+ self.state = "keb"
+ elif name == "reb":
+ self.state = "reb"
+ elif name == "gloss" and not attrs:
+ self.state = "gloss"
+ elif name == "xref":
+ self.state = "xref"
+
def endElement(self, name):
- if name == "entry":
- self.mapping.append((self.kanji, self.chars, self.gloss))
- elif name == "keb":
- self.kanji.append(self.buffer)
- elif name == "reb":
- self.chars.append(self.buffer)
- elif name == "gloss" and self.buffer:
- self.gloss.append(self.buffer)
- elif name == "xref":
- self.gloss.append(self.buffer)
-
- self.buffer = ""
- self.state = ""
-
+ if name == "entry":
+ self.mapping.append((self.kanji, self.chars, self.gloss))
+ elif name == "keb":
+ self.kanji.append(self.buffer)
+ elif name == "reb":
+ self.chars.append(self.buffer)
+ elif name == "gloss" and self.buffer:
+ self.gloss.append(self.buffer)
+ elif name == "xref":
+ self.gloss.append(self.buffer)
+
+ self.buffer = ""
+ self.state = ""
+
def characters(self, ch):
- if self.state in ["keb", "reb", "gloss", "xref"]:
- self.buffer = self.buffer + ch
-
+ if self.state in ["keb", "reb", "gloss", "xref"]:
+ self.buffer = self.buffer + ch
+
def map_to_file(dictmap, filename):
dict = open(filename + ".dict","wb")
@@ -111,59 +110,59 @@ def map_to_file(dictmap, filename):
idx.write(struct.pack("!I",len(dictmap)))
for k,v in dictmap:
- k_utf8 = k.encode("utf-8")
- v_utf8 = v.encode("utf-8")
- idx.write(k_utf8 + "\0")
- idx.write(struct.pack("!I",offset))
- idx.write(struct.pack("!I",len(v_utf8)))
- offset += len(v_utf8)
- dict.write(v_utf8)
+ k_utf8 = k.encode("utf-8")
+ v_utf8 = v.encode("utf-8")
+ idx.write(k_utf8 + "\0")
+ idx.write(struct.pack("!I",offset))
+ idx.write(struct.pack("!I",len(v_utf8)))
+ offset += len(v_utf8)
+ dict.write(v_utf8)
dict.close()
idx.close()
if __name__ == "__main__":
- print "opening xml dict .."
+ print("opening xml dict ..")
f = gzip.open("JMdict.gz")
#f = open("jmdict_sample.xml")
- print "parsing xml file .."
+ print("parsing xml file ..")
parser = xml.sax.make_parser()
handler = JMDictHandler()
parser.setContentHandler(handler)
parser.parse(f)
f.close()
- print "creating dictionary .."
+ print("creating dictionary ..")
# create a japanese -> english mappings
jap_to_eng = []
for kanji,chars,gloss in handler.mapping:
- for k in kanji:
- key = k
- value = string.join(chars + gloss, "\n")
- jap_to_eng.append((key,value))
- for c in chars:
- key = c
- value = string.join(kanji + gloss, "\n")
- jap_to_eng.append((key,value))
-
+ for k in kanji:
+ key = k
+ value = string.join(chars + gloss, "\n")
+ jap_to_eng.append((key,value))
+ for c in chars:
+ key = c
+ value = string.join(kanji + gloss, "\n")
+ jap_to_eng.append((key,value))
+
eng_to_jap = []
for kanji,chars,gloss in handler.mapping:
- for k in gloss:
- key = k
- value = string.join(kanji + chars, "\n")
- eng_to_jap.append((key,value))
-
- print "sorting dictionary .."
+ for k in gloss:
+ key = k
+ value = string.join(kanji + chars, "\n")
+ eng_to_jap.append((key,value))
+
+ print("sorting dictionary ..")
jap_to_eng.sort(strcasecmp)
eng_to_jap.sort(strcasecmp)
- print "merging and pruning dups.."
+ print("merging and pruning dups..")
jap_to_eng = merge_dup(jap_to_eng)
eng_to_jap = merge_dup(eng_to_jap)
- print "writing to files.."
+ print("writing to files..")
# create dict and idx file
map_to_file(jap_to_eng, "jmdict-ja-en")
Index: stardict-tools-3.0.1/src/makevietdict.py
===================================================================
--- stardict-tools-3.0.1.orig/src/makevietdict.py
+++ stardict-tools-3.0.1/src/makevietdict.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
# WinVNKey Hannom Database to Stardict dictionary source Conversion Tool
# coded by wesnoth@ustc on 070804
# http://winvnkey.sourceforge.net
@@ -7,7 +7,7 @@ infileencoding = 'utf-16-le'
outfileencoding = 'utf-8'
def showhelp():
- print "Usage: %s filename" % sys.argv[0]
+ print("Usage: %s filename" % sys.argv[0])
def ishantu(str):
if len(str) > 0 and ord(str[0]) > 0x2e80:
@@ -37,15 +37,15 @@ def mysplit(line):
return line
if __name__ == '__main__':
- if len(sys.argv) <> 2:
+ if len(sys.argv) != 2:
showhelp()
else:
fp = open(sys.argv[1], 'r')
- print 'Reading file...'
+ print('Reading file...')
lines = unicode(fp.read(), infileencoding).split(u'\n')
lineno = 0
hugedict = {}
- print 'Generating Han-Viet dict...'
+ print('Generating Han-Viet dict...')
for line in lines:
lineno += 1
if line.endswith(u'\r'):
@@ -72,7 +72,7 @@ if __name__ == '__main__':
line[1] = filter(None, map(string.strip, line[1].split(u',')))
#hugedict[line[0]] = hugedict.get(line[0], []) + line[1]
for item in line[1]:
- if not hugedict.has_key(line[0]):
+ if line[0] not in hugedict:
hugedict[line[0]] = [item]
elif not item in hugedict[line[0]]:
hugedict[line[0]] += [item]
@@ -83,25 +83,25 @@ if __name__ == '__main__':
# print viettu.encode('utf-8'), ',',
# print
fp.close()
- print 'Generating Viet-Han dict...'
+ print('Generating Viet-Han dict...')
dicthuge = {}
- for hantu, quocngu in hugedict.iteritems():
+ for hantu, quocngu in hugedict.items():
for viettu in quocngu:
- if not dicthuge.has_key(viettu):
+ if viettu not in dicthuge:
dicthuge[viettu] = [hantu]
elif not hantu in dicthuge[viettu]:
dicthuge[viettu] += [hantu]
- print 'Writing Han-Viet dict...'
+ print('Writing Han-Viet dict...')
gp = open('hanviet.txt', 'w')
- for hantu, quocngu in hugedict.iteritems():
+ for hantu, quocngu in hugedict.items():
gp.write(hantu.encode('utf-8'))
gp.write('\t')
gp.write((u', '.join(quocngu)).encode('utf-8'))
gp.write('\n')
gp.close()
- print 'Writing Viet-Han dict...'
+ print('Writing Viet-Han dict...')
gp = open('viethan.txt', 'w')
- for quocngu,hantu in dicthuge.iteritems():
+ for quocngu,hantu in dicthuge.items():
gp.write(quocngu.encode('utf-8'))
gp.write('\t')
gp.write((u' '.join(hantu)).encode('utf-8'))
Index: stardict-tools-3.0.1/src/lingea-trd-decoder.py
===================================================================
--- stardict-tools-3.0.1.orig/src/lingea-trd-decoder.py
+++ stardict-tools-3.0.1/src/lingea-trd-decoder.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
# -*- coding: utf-8 -*-
#
# Script for decoding Lingea Dictionary (.trd) file
@@ -49,34 +49,34 @@ VERSION = "0.4"
import getopt, sys
def usage():
- print "Lingea Dictionary Decoder"
- print "-------------------------"
- print "Version: %s" % VERSION
- print "Copyright (C) 2007 - Klokan Petr Pridal, Petr Dlouhy"
- print
- print "Usage: python lingea-trd-decoder.py DICTIONARY.trd > DICTIONARY.tab"
- print "Result convertion by stardict-tools: /usr/lib/stardict-tools/tabfile"
- print
- print " -o <num> --out-style : Output style"
- print " 0 no tags"
- print " 1 \\n tags"
- print " 2 html tags"
- print " -h --help : Print this message"
- print " -d --debug : Degub"
- print " -r --debug-header : Degub - print headers"
- print " -a --debug-all : Degub - print all records"
- print " -l --debug-limit : Degub limit"
- print
- print "For HTML support in StarDict dictionary .ifo has to contain:"
- print "sametypesequence=g"
- print "!!! Change the .ifo file after generation by tabfile !!!"
- print
+ print("Lingea Dictionary Decoder")
+ print("-------------------------")
+ print("Version: %s" % VERSION)
+ print("Copyright (C) 2007 - Klokan Petr Pridal, Petr Dlouhy")
+ print()
+ print("Usage: python lingea-trd-decoder.py DICTIONARY.trd > DICTIONARY.tab")
+ print("Result convertion by stardict-tools: /usr/lib/stardict-tools/tabfile")
+ print()
+ print(" -o <num> --out-style : Output style")
+ print(" 0 no tags")
+ print(" 1 \\n tags")
+ print(" 2 html tags")
+ print(" -h --help : Print this message")
+ print(" -d --debug : Degub")
+ print(" -r --debug-header : Degub - print headers")
+ print(" -a --debug-all : Degub - print all records")
+ print(" -l --debug-limit : Degub limit")
+ print()
+ print("For HTML support in StarDict dictionary .ifo has to contain:")
+ print("sametypesequence=g")
+ print("!!! Change the .ifo file after generation by tabfile !!!")
+ print()
try:
opts, args = getopt.getopt(sys.argv[1:], "hdo:ral:", ["help", "debug", "out-style=", "debug-header", "debug-all", "debug-limit="])
except getopt.GetoptError:
usage()
- print "ERROR: Bad option"
+ print("ERROR: Bad option")
sys.exit(2)
import locale
@@ -94,7 +94,7 @@ for o, a in opts:
OUTSTYLE = locale.atoi(a)
if OUTSTYLE > 2:
usage()
- print "ERROR: Output style not specified"
+ print("ERROR: Output style not specified")
if o in ("-r", "--debug-header"):
# If DEBUG and DEBUGHEADER, then print just all header records
DEBUGHEADER = True
@@ -113,7 +113,7 @@ if len(args) == 1:
FILENAME = args[0]
else:
usage()
- print "ERROR: You have to specify .trd file to decode"
+ print("ERROR: You have to specify .trd file to decode")
sys.exit(2)
from struct import *
@@ -428,7 +428,7 @@ def out( comment = "", skip = False):
comment = comment % s
else:
comment = comment % bs[pos]
- if DEBUG: print "%03d %s %s | %s | %03d" % (pos, toBin(bs[pos]),comment, s, (triple + pos))
+ if DEBUG: print("%03d %s %s | %s | %03d" % (pos, toBin(bs[pos]),comment, s, (triple + pos)))
if skip:
pos += triple + 1
return s.replace('`','') # Remove '`' character from words
@@ -671,14 +671,14 @@ if DEBUG:
s = decode(getRec(i))
if DEBUGHEADER:
# print s.split('\t')[0]
- print s
+ print(s)
if DEBUGLIMIT > 0 and not s.endswith('\n'):
DEBUG = True
- print "-"*80
- print "%s) at address %s" % (i, toBin(index[i]))
- print
+ print("-"*80)
+ print("%s) at address %s" % (i, toBin(index[i])))
+ print()
s = decode(getRec(i))
- print s
+ print(s)
DEBUGLIMIT -= 1
DEBUG = True
else:
@@ -686,10 +686,10 @@ else:
for i in range(1,entryCount):
s = decode(getRec(i))
if s.endswith('\n'):
- print s,
+ print(s, end=' ')
else:
- print s
- print "!!! RECORD STRUCTURE DECODING ERROR !!!"
- print "Please run this script in DEBUG mode and repair DATA BLOCK(S) section in function decode()"
- print "If you succeed with whole dictionary send report (name of the dictionary and source code of script) to slovniky@googlegroups.com"
+ print(s)
+ print("!!! RECORD STRUCTURE DECODING ERROR !!!")
+ print("Please run this script in DEBUG mode and repair DATA BLOCK(S) section in function decode()")
+ print("If you succeed with whole dictionary send report (name of the dictionary and source code of script) to slovniky@googlegroups.com")
break
Index: stardict-tools-3.0.1/src/extractKangXi.py
===================================================================
--- stardict-tools-3.0.1.orig/src/extractKangXi.py
+++ stardict-tools-3.0.1/src/extractKangXi.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
# -*- coding: utf-8 -*-
import sys, os, string, re, glob
import libxml2dom
@@ -20,7 +20,7 @@ num = 0
errorfiles = []
for filename in filelist:
num += 1
- print >> sys.stderr, filename, num, 'of', filenum
+ print(filename, num, 'of', filenum, file=sys.stderr)
try:
fp = open(filename, 'r')
doc = libxml2dom.parseString(fp.read(), html=1)
@@ -29,7 +29,7 @@ for filename in filelist:
style = re.search(r'(?s)\s*\.(\S+)\s*{\s*display:\s*none', style)
displaynone = style.group(1)
tabpages = doc.getElementsByTagName("div")
- tabpages = filter(lambda s: s.getAttribute("class") == "tab-page", tabpages)
+ tabpages = [s for s in tabpages if s.getAttribute("class") == "tab-page"]
for tabpage in tabpages:
found = False
for node in tabpage.childNodes:
@@ -45,16 +45,16 @@ for filename in filelist:
paragraphs = tabpage.getElementsByTagName("p")
thisitem = character + u'\t'
for paragraph in paragraphs:
- if paragraph.getAttribute("class") <> displaynone:
+ if paragraph.getAttribute("class") != displaynone:
#print TextInNode(paragraph).encode(fencoding)
text = paragraph.textContent
#text = filter(lambda s: not s in u' \t\r\n', text)
text = re.sub(r'\s+', r' ', text)
thisitem += text + u'\\n'
- print thisitem.encode(fencoding)
+ print(thisitem.encode(fencoding))
except:
- print >> sys.stderr, 'error occured'
+ print('error occured', file=sys.stderr)
errorfiles += [filename]
continue
if errorfiles:
- print >> sys.stderr, 'Error files:', '\n'.join(errorfiles)
+ print('Error files:', '\n'.join(errorfiles), file=sys.stderr)
Index: stardict-tools-3.0.1/src/stmerge.py
===================================================================
--- stardict-tools-3.0.1.orig/src/stmerge.py
+++ stardict-tools-3.0.1/src/stmerge.py
@@ -1,18 +1,19 @@
-import sys, string
-base = {}
-for line in sys.stdin.readlines():
- words = string.split(line[:-1], '\t')
- if len(words) != 2:
- print "Error!"
- exit
- if base.has_key(words[0]):
- base[words[0]] += [words[1]]
- else:
- base[words[0]] = [words[1]]
-keys = base.keys()
-keys.sort()
-for key in keys:
- print key,'\t',
- for val in base[key]:
- print val,',',
- print
+#!/usr/bin/python3
+import sys, string
+base = {}
+for line in sys.stdin.readlines():
+ words = string.split(line[:-1], '\t')
+ if len(words) != 2:
+ print("Error!")
+ exit
+ if words[0] in base:
+ base[words[0]] += [words[1]]
+ else:
+ base[words[0]] = [words[1]]
+keys = list(base.keys())
+keys.sort()
+for key in keys:
+ print(key,'\t', end=' ')
+ for val in base[key]:
+ print(val,',', end=' ')
+ print()
Index: stardict-tools-3.0.1/src/KangXiZiDian-djvu2tiff.py
===================================================================
--- stardict-tools-3.0.1.orig/src/KangXiZiDian-djvu2tiff.py
+++ stardict-tools-3.0.1/src/KangXiZiDian-djvu2tiff.py
@@ -1,3 +1,4 @@
+#!/usr/bin/python3
# This tool convert KangXiZiDian djvu files to tiff files.
# Download djvu files: http://bbs.dartmouth.edu/~fangq/KangXi/KangXi.tar
# Character page info: http://wenq.org/unihan/Unihan.txt as kIRGKangXi field.
Index: stardict-tools-3.0.1/src/hanzim2dict.py
===================================================================
--- stardict-tools-3.0.1.orig/src/hanzim2dict.py
+++ stardict-tools-3.0.1/src/hanzim2dict.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
#
# hanzim2dict
#
@@ -44,7 +44,7 @@ for line in lines:
code = toUTF(fromGB(line[0])[0])[0]
pinyin = line[2]
definition = '<'+pinyin+'> '+line[3]+' ['+line[1]+']'
- if wordmap.has_key(code):
+ if code in wordmap:
wordmap[code].add(definition)
else:
wordmap[code] = Word(code, definition)
@@ -55,11 +55,11 @@ for filename in ("cidianf.gb", "sanzicid
for line in lines:
if len(line) < 2:
- print len(line)
+ print(len(line))
continue
code = toUTF(fromGB(line[0][:-2])[0])[0]
definition = line[1]+' ['+line[0][-1:]+']'
- if wordmap.has_key(code):
+ if code in wordmap:
wordmap[code].add(definition)
else:
wordmap[code] = Word(code, definition)
Index: stardict-tools-3.0.1/src/mkguangyunst.py
===================================================================
--- stardict-tools-3.0.1.orig/src/mkguangyunst.py
+++ stardict-tools-3.0.1/src/mkguangyunst.py
@@ -1,3 +1,4 @@
+#!/usr/bin/python3
import sys, string
for line in sys.stdin.readlines():
words = string.split(line[:-1], '\t')
@@ -15,6 +16,6 @@ for line in sys.stdin.readlines():
pinyin= words[13]
psyun = words[22]
if beizhu == '':
- print "%s\t%s %s%s%s%s%s%s %sQIE PINYIN%s PSYUN%s\\n%s" % (romazi, muci, sheng, yunbu, she, hu, deng, diao, fanqie, pinyin, psyun, chars)
+ print("%s\t%s %s%s%s%s%s%s %sQIE PINYIN%s PSYUN%s\\n%s" % (romazi, muci, sheng, yunbu, she, hu, deng, diao, fanqie, pinyin, psyun, chars))
else:
- print "%s\t%s %s%s%s%s%s%s %sQIE PINYIN%s PSYUN%s\\n%s\\n%s" % (romazi, muci, sheng, yunbu, she, hu, deng, diao, fanqie, pinyin, psyun, chars, beizhu)
+ print("%s\t%s %s%s%s%s%s%s %sQIE PINYIN%s PSYUN%s\\n%s\\n%s" % (romazi, muci, sheng, yunbu, she, hu, deng, diao, fanqie, pinyin, psyun, chars, beizhu))
Index: stardict-tools-3.0.1/src/uyghur2dict.py
===================================================================
--- stardict-tools-3.0.1.orig/src/uyghur2dict.py
+++ stardict-tools-3.0.1/src/uyghur2dict.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
#
# uyghur2dict
# By Abdisalam (anatilim@gmail.com), inspired by Michael Robinson's hanzim2dict converter.
@@ -41,7 +41,7 @@ lines = map(lambda x: split(x[:-1], '\t\
for line in lines:
code = line[0]
definition = line[1]
- if wordmap.has_key(code):
+ if code in wordmap:
wordmap[code].add(definition)
else:
wordmap[code] = Word(code, definition)
@@ -84,4 +84,4 @@ ifo.write("author=Abdisalam\n")
ifo.write("email=anatilim@gmail.com\n")
ifo.write("description=感谢新疆维吾尔自治区语委会、新疆青少年出版社为我们提供《汉维词典》的词库\n")
ifo.write("sametypesequence=m\n")
-ifo.close()
\ No newline at end of file
+ifo.close()