603 lines
20 KiB
Diff
603 lines
20 KiB
Diff
|
|
Index: stardict-tools-3.0.1/src/jm2stardict.py
|
|||
|
|
===================================================================
|
|||
|
|
--- stardict-tools-3.0.1.orig/src/jm2stardict.py
|
|||
|
|
+++ stardict-tools-3.0.1/src/jm2stardict.py
|
|||
|
|
@@ -1,4 +1,4 @@
|
|||
|
|
-#!/usr/bin/env python2
|
|||
|
|
+#!/usr/bin/python3
|
|||
|
|
#
|
|||
|
|
# converts XML JMDict to Stardict idx/dict format
|
|||
|
|
# JMDict website: http://www.csse.monash.edu.au/~jwb/j_jmdict.html
|
|||
|
|
@@ -27,9 +27,8 @@ import struct, sys, string, codecs,os
|
|||
|
|
|
|||
|
|
def text(nodes):
|
|||
|
|
label = ""
|
|||
|
|
- textnodes = filter(lambda x: x.nodeName == "#text", nodes)
|
|||
|
|
- for t in textnodes:
|
|||
|
|
- label += t.data
|
|||
|
|
+ for t in [x for x in nodes if x.nodeName == "#text"]:
|
|||
|
|
+ label += t.data
|
|||
|
|
return label
|
|||
|
|
|
|||
|
|
def strcasecmp(a, b):
|
|||
|
|
@@ -42,7 +41,7 @@ def strcasecmp(a, b):
|
|||
|
|
# if result == 0:
|
|||
|
|
|
|||
|
|
result = cmp(a[0].lower() , b[0].lower())
|
|||
|
|
-
|
|||
|
|
+
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
def merge_dup(list):
|
|||
|
|
@@ -50,55 +49,55 @@ def merge_dup(list):
|
|||
|
|
lastkey = ""
|
|||
|
|
|
|||
|
|
for x in list:
|
|||
|
|
- if x[0] == lastkey:
|
|||
|
|
- newlist[-1] = (newlist[-1][0], newlist[-1][1] + "\n" + x[1])
|
|||
|
|
- else:
|
|||
|
|
- newlist.append(x)
|
|||
|
|
- lastkey = x[0]
|
|||
|
|
+ if x[0] == lastkey:
|
|||
|
|
+ newlist[-1] = (newlist[-1][0], newlist[-1][1] + "\n" + x[1])
|
|||
|
|
+ else:
|
|||
|
|
+ newlist.append(x)
|
|||
|
|
+ lastkey = x[0]
|
|||
|
|
|
|||
|
|
return newlist
|
|||
|
|
|
|||
|
|
class JMDictHandler(ContentHandler):
|
|||
|
|
def __init__(self):
|
|||
|
|
- self.mapping = []
|
|||
|
|
- self.state = ""
|
|||
|
|
- self.buffer = ""
|
|||
|
|
+ self.mapping = []
|
|||
|
|
+ self.state = ""
|
|||
|
|
+ self.buffer = ""
|
|||
|
|
|
|||
|
|
def startElement(self, name, attrs):
|
|||
|
|
- if name == "entry":
|
|||
|
|
- self.kanji = []
|
|||
|
|
- self.chars = []
|
|||
|
|
- self.gloss = []
|
|||
|
|
- self.state = ""
|
|||
|
|
- self.buffer = ""
|
|||
|
|
- elif name == "keb":
|
|||
|
|
- self.state = "keb"
|
|||
|
|
- elif name == "reb":
|
|||
|
|
- self.state = "reb"
|
|||
|
|
- elif name == "gloss" and not attrs:
|
|||
|
|
- self.state = "gloss"
|
|||
|
|
- elif name == "xref":
|
|||
|
|
- self.state = "xref"
|
|||
|
|
-
|
|||
|
|
+ if name == "entry":
|
|||
|
|
+ self.kanji = []
|
|||
|
|
+ self.chars = []
|
|||
|
|
+ self.gloss = []
|
|||
|
|
+ self.state = ""
|
|||
|
|
+ self.buffer = ""
|
|||
|
|
+ elif name == "keb":
|
|||
|
|
+ self.state = "keb"
|
|||
|
|
+ elif name == "reb":
|
|||
|
|
+ self.state = "reb"
|
|||
|
|
+ elif name == "gloss" and not attrs:
|
|||
|
|
+ self.state = "gloss"
|
|||
|
|
+ elif name == "xref":
|
|||
|
|
+ self.state = "xref"
|
|||
|
|
+
|
|||
|
|
def endElement(self, name):
|
|||
|
|
- if name == "entry":
|
|||
|
|
- self.mapping.append((self.kanji, self.chars, self.gloss))
|
|||
|
|
- elif name == "keb":
|
|||
|
|
- self.kanji.append(self.buffer)
|
|||
|
|
- elif name == "reb":
|
|||
|
|
- self.chars.append(self.buffer)
|
|||
|
|
- elif name == "gloss" and self.buffer:
|
|||
|
|
- self.gloss.append(self.buffer)
|
|||
|
|
- elif name == "xref":
|
|||
|
|
- self.gloss.append(self.buffer)
|
|||
|
|
-
|
|||
|
|
- self.buffer = ""
|
|||
|
|
- self.state = ""
|
|||
|
|
-
|
|||
|
|
+ if name == "entry":
|
|||
|
|
+ self.mapping.append((self.kanji, self.chars, self.gloss))
|
|||
|
|
+ elif name == "keb":
|
|||
|
|
+ self.kanji.append(self.buffer)
|
|||
|
|
+ elif name == "reb":
|
|||
|
|
+ self.chars.append(self.buffer)
|
|||
|
|
+ elif name == "gloss" and self.buffer:
|
|||
|
|
+ self.gloss.append(self.buffer)
|
|||
|
|
+ elif name == "xref":
|
|||
|
|
+ self.gloss.append(self.buffer)
|
|||
|
|
+
|
|||
|
|
+ self.buffer = ""
|
|||
|
|
+ self.state = ""
|
|||
|
|
+
|
|||
|
|
def characters(self, ch):
|
|||
|
|
- if self.state in ["keb", "reb", "gloss", "xref"]:
|
|||
|
|
- self.buffer = self.buffer + ch
|
|||
|
|
-
|
|||
|
|
+ if self.state in ["keb", "reb", "gloss", "xref"]:
|
|||
|
|
+ self.buffer = self.buffer + ch
|
|||
|
|
+
|
|||
|
|
|
|||
|
|
def map_to_file(dictmap, filename):
|
|||
|
|
dict = open(filename + ".dict","wb")
|
|||
|
|
@@ -111,59 +110,59 @@ def map_to_file(dictmap, filename):
|
|||
|
|
idx.write(struct.pack("!I",len(dictmap)))
|
|||
|
|
|
|||
|
|
for k,v in dictmap:
|
|||
|
|
- k_utf8 = k.encode("utf-8")
|
|||
|
|
- v_utf8 = v.encode("utf-8")
|
|||
|
|
- idx.write(k_utf8 + "\0")
|
|||
|
|
- idx.write(struct.pack("!I",offset))
|
|||
|
|
- idx.write(struct.pack("!I",len(v_utf8)))
|
|||
|
|
- offset += len(v_utf8)
|
|||
|
|
- dict.write(v_utf8)
|
|||
|
|
+ k_utf8 = k.encode("utf-8")
|
|||
|
|
+ v_utf8 = v.encode("utf-8")
|
|||
|
|
+ idx.write(k_utf8 + "\0")
|
|||
|
|
+ idx.write(struct.pack("!I",offset))
|
|||
|
|
+ idx.write(struct.pack("!I",len(v_utf8)))
|
|||
|
|
+ offset += len(v_utf8)
|
|||
|
|
+ dict.write(v_utf8)
|
|||
|
|
|
|||
|
|
dict.close()
|
|||
|
|
idx.close()
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
|
|||
|
|
- print "opening xml dict .."
|
|||
|
|
+ print("opening xml dict ..")
|
|||
|
|
f = gzip.open("JMdict.gz")
|
|||
|
|
#f = open("jmdict_sample.xml")
|
|||
|
|
|
|||
|
|
- print "parsing xml file .."
|
|||
|
|
+ print("parsing xml file ..")
|
|||
|
|
parser = xml.sax.make_parser()
|
|||
|
|
handler = JMDictHandler()
|
|||
|
|
parser.setContentHandler(handler)
|
|||
|
|
parser.parse(f)
|
|||
|
|
f.close()
|
|||
|
|
|
|||
|
|
- print "creating dictionary .."
|
|||
|
|
+ print("creating dictionary ..")
|
|||
|
|
# create a japanese -> english mappings
|
|||
|
|
jap_to_eng = []
|
|||
|
|
for kanji,chars,gloss in handler.mapping:
|
|||
|
|
- for k in kanji:
|
|||
|
|
- key = k
|
|||
|
|
- value = string.join(chars + gloss, "\n")
|
|||
|
|
- jap_to_eng.append((key,value))
|
|||
|
|
- for c in chars:
|
|||
|
|
- key = c
|
|||
|
|
- value = string.join(kanji + gloss, "\n")
|
|||
|
|
- jap_to_eng.append((key,value))
|
|||
|
|
-
|
|||
|
|
+ for k in kanji:
|
|||
|
|
+ key = k
|
|||
|
|
+ value = string.join(chars + gloss, "\n")
|
|||
|
|
+ jap_to_eng.append((key,value))
|
|||
|
|
+ for c in chars:
|
|||
|
|
+ key = c
|
|||
|
|
+ value = string.join(kanji + gloss, "\n")
|
|||
|
|
+ jap_to_eng.append((key,value))
|
|||
|
|
+
|
|||
|
|
eng_to_jap = []
|
|||
|
|
for kanji,chars,gloss in handler.mapping:
|
|||
|
|
- for k in gloss:
|
|||
|
|
- key = k
|
|||
|
|
- value = string.join(kanji + chars, "\n")
|
|||
|
|
- eng_to_jap.append((key,value))
|
|||
|
|
-
|
|||
|
|
- print "sorting dictionary .."
|
|||
|
|
+ for k in gloss:
|
|||
|
|
+ key = k
|
|||
|
|
+ value = string.join(kanji + chars, "\n")
|
|||
|
|
+ eng_to_jap.append((key,value))
|
|||
|
|
+
|
|||
|
|
+ print("sorting dictionary ..")
|
|||
|
|
jap_to_eng.sort(strcasecmp)
|
|||
|
|
eng_to_jap.sort(strcasecmp)
|
|||
|
|
|
|||
|
|
- print "merging and pruning dups.."
|
|||
|
|
+ print("merging and pruning dups..")
|
|||
|
|
jap_to_eng = merge_dup(jap_to_eng)
|
|||
|
|
eng_to_jap = merge_dup(eng_to_jap)
|
|||
|
|
|
|||
|
|
- print "writing to files.."
|
|||
|
|
+ print("writing to files..")
|
|||
|
|
|
|||
|
|
# create dict and idx file
|
|||
|
|
map_to_file(jap_to_eng, "jmdict-ja-en")
|
|||
|
|
Index: stardict-tools-3.0.1/src/makevietdict.py
|
|||
|
|
===================================================================
|
|||
|
|
--- stardict-tools-3.0.1.orig/src/makevietdict.py
|
|||
|
|
+++ stardict-tools-3.0.1/src/makevietdict.py
|
|||
|
|
@@ -1,4 +1,4 @@
|
|||
|
|
-#!/usr/bin/python
|
|||
|
|
+#!/usr/bin/python3
|
|||
|
|
# WinVNKey Hannom Database to Stardict dictionary source Conversion Tool
|
|||
|
|
# coded by wesnoth@ustc on 070804
|
|||
|
|
# http://winvnkey.sourceforge.net
|
|||
|
|
@@ -7,7 +7,7 @@ infileencoding = 'utf-16-le'
|
|||
|
|
outfileencoding = 'utf-8'
|
|||
|
|
|
|||
|
|
def showhelp():
|
|||
|
|
- print "Usage: %s filename" % sys.argv[0]
|
|||
|
|
+ print("Usage: %s filename" % sys.argv[0])
|
|||
|
|
|
|||
|
|
def ishantu(str):
|
|||
|
|
if len(str) > 0 and ord(str[0]) > 0x2e80:
|
|||
|
|
@@ -37,15 +37,15 @@ def mysplit(line):
|
|||
|
|
return line
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
- if len(sys.argv) <> 2:
|
|||
|
|
+ if len(sys.argv) != 2:
|
|||
|
|
showhelp()
|
|||
|
|
else:
|
|||
|
|
fp = open(sys.argv[1], 'r')
|
|||
|
|
- print 'Reading file...'
|
|||
|
|
+ print('Reading file...')
|
|||
|
|
lines = unicode(fp.read(), infileencoding).split(u'\n')
|
|||
|
|
lineno = 0
|
|||
|
|
hugedict = {}
|
|||
|
|
- print 'Generating Han-Viet dict...'
|
|||
|
|
+ print('Generating Han-Viet dict...')
|
|||
|
|
for line in lines:
|
|||
|
|
lineno += 1
|
|||
|
|
if line.endswith(u'\r'):
|
|||
|
|
@@ -72,7 +72,7 @@ if __name__ == '__main__':
|
|||
|
|
line[1] = filter(None, map(string.strip, line[1].split(u',')))
|
|||
|
|
#hugedict[line[0]] = hugedict.get(line[0], []) + line[1]
|
|||
|
|
for item in line[1]:
|
|||
|
|
- if not hugedict.has_key(line[0]):
|
|||
|
|
+ if line[0] not in hugedict:
|
|||
|
|
hugedict[line[0]] = [item]
|
|||
|
|
elif not item in hugedict[line[0]]:
|
|||
|
|
hugedict[line[0]] += [item]
|
|||
|
|
@@ -83,25 +83,25 @@ if __name__ == '__main__':
|
|||
|
|
# print viettu.encode('utf-8'), ',',
|
|||
|
|
# print
|
|||
|
|
fp.close()
|
|||
|
|
- print 'Generating Viet-Han dict...'
|
|||
|
|
+ print('Generating Viet-Han dict...')
|
|||
|
|
dicthuge = {}
|
|||
|
|
- for hantu, quocngu in hugedict.iteritems():
|
|||
|
|
+ for hantu, quocngu in hugedict.items():
|
|||
|
|
for viettu in quocngu:
|
|||
|
|
- if not dicthuge.has_key(viettu):
|
|||
|
|
+ if viettu not in dicthuge:
|
|||
|
|
dicthuge[viettu] = [hantu]
|
|||
|
|
elif not hantu in dicthuge[viettu]:
|
|||
|
|
dicthuge[viettu] += [hantu]
|
|||
|
|
- print 'Writing Han-Viet dict...'
|
|||
|
|
+ print('Writing Han-Viet dict...')
|
|||
|
|
gp = open('hanviet.txt', 'w')
|
|||
|
|
- for hantu, quocngu in hugedict.iteritems():
|
|||
|
|
+ for hantu, quocngu in hugedict.items():
|
|||
|
|
gp.write(hantu.encode('utf-8'))
|
|||
|
|
gp.write('\t')
|
|||
|
|
gp.write((u', '.join(quocngu)).encode('utf-8'))
|
|||
|
|
gp.write('\n')
|
|||
|
|
gp.close()
|
|||
|
|
- print 'Writing Viet-Han dict...'
|
|||
|
|
+ print('Writing Viet-Han dict...')
|
|||
|
|
gp = open('viethan.txt', 'w')
|
|||
|
|
- for quocngu,hantu in dicthuge.iteritems():
|
|||
|
|
+ for quocngu,hantu in dicthuge.items():
|
|||
|
|
gp.write(quocngu.encode('utf-8'))
|
|||
|
|
gp.write('\t')
|
|||
|
|
gp.write((u' '.join(hantu)).encode('utf-8'))
|
|||
|
|
Index: stardict-tools-3.0.1/src/lingea-trd-decoder.py
|
|||
|
|
===================================================================
|
|||
|
|
--- stardict-tools-3.0.1.orig/src/lingea-trd-decoder.py
|
|||
|
|
+++ stardict-tools-3.0.1/src/lingea-trd-decoder.py
|
|||
|
|
@@ -1,4 +1,4 @@
|
|||
|
|
-#!/usr/bin/env python
|
|||
|
|
+#!/usr/bin/python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
#
|
|||
|
|
# Script for decoding Lingea Dictionary (.trd) file
|
|||
|
|
@@ -49,34 +49,34 @@ VERSION = "0.4"
|
|||
|
|
|
|||
|
|
import getopt, sys
|
|||
|
|
def usage():
|
|||
|
|
- print "Lingea Dictionary Decoder"
|
|||
|
|
- print "-------------------------"
|
|||
|
|
- print "Version: %s" % VERSION
|
|||
|
|
- print "Copyright (C) 2007 - Klokan Petr Pridal, Petr Dlouhy"
|
|||
|
|
- print
|
|||
|
|
- print "Usage: python lingea-trd-decoder.py DICTIONARY.trd > DICTIONARY.tab"
|
|||
|
|
- print "Result convertion by stardict-tools: /usr/lib/stardict-tools/tabfile"
|
|||
|
|
- print
|
|||
|
|
- print " -o <num> --out-style : Output style"
|
|||
|
|
- print " 0 no tags"
|
|||
|
|
- print " 1 \\n tags"
|
|||
|
|
- print " 2 html tags"
|
|||
|
|
- print " -h --help : Print this message"
|
|||
|
|
- print " -d --debug : Degub"
|
|||
|
|
- print " -r --debug-header : Degub - print headers"
|
|||
|
|
- print " -a --debug-all : Degub - print all records"
|
|||
|
|
- print " -l --debug-limit : Degub limit"
|
|||
|
|
- print
|
|||
|
|
- print "For HTML support in StarDict dictionary .ifo has to contain:"
|
|||
|
|
- print "sametypesequence=g"
|
|||
|
|
- print "!!! Change the .ifo file after generation by tabfile !!!"
|
|||
|
|
- print
|
|||
|
|
+ print("Lingea Dictionary Decoder")
|
|||
|
|
+ print("-------------------------")
|
|||
|
|
+ print("Version: %s" % VERSION)
|
|||
|
|
+ print("Copyright (C) 2007 - Klokan Petr Pridal, Petr Dlouhy")
|
|||
|
|
+ print()
|
|||
|
|
+ print("Usage: python lingea-trd-decoder.py DICTIONARY.trd > DICTIONARY.tab")
|
|||
|
|
+ print("Result convertion by stardict-tools: /usr/lib/stardict-tools/tabfile")
|
|||
|
|
+ print()
|
|||
|
|
+ print(" -o <num> --out-style : Output style")
|
|||
|
|
+ print(" 0 no tags")
|
|||
|
|
+ print(" 1 \\n tags")
|
|||
|
|
+ print(" 2 html tags")
|
|||
|
|
+ print(" -h --help : Print this message")
|
|||
|
|
+ print(" -d --debug : Degub")
|
|||
|
|
+ print(" -r --debug-header : Degub - print headers")
|
|||
|
|
+ print(" -a --debug-all : Degub - print all records")
|
|||
|
|
+ print(" -l --debug-limit : Degub limit")
|
|||
|
|
+ print()
|
|||
|
|
+ print("For HTML support in StarDict dictionary .ifo has to contain:")
|
|||
|
|
+ print("sametypesequence=g")
|
|||
|
|
+ print("!!! Change the .ifo file after generation by tabfile !!!")
|
|||
|
|
+ print()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
opts, args = getopt.getopt(sys.argv[1:], "hdo:ral:", ["help", "debug", "out-style=", "debug-header", "debug-all", "debug-limit="])
|
|||
|
|
except getopt.GetoptError:
|
|||
|
|
usage()
|
|||
|
|
- print "ERROR: Bad option"
|
|||
|
|
+ print("ERROR: Bad option")
|
|||
|
|
sys.exit(2)
|
|||
|
|
|
|||
|
|
import locale
|
|||
|
|
@@ -94,7 +94,7 @@ for o, a in opts:
|
|||
|
|
OUTSTYLE = locale.atoi(a)
|
|||
|
|
if OUTSTYLE > 2:
|
|||
|
|
usage()
|
|||
|
|
- print "ERROR: Output style not specified"
|
|||
|
|
+ print("ERROR: Output style not specified")
|
|||
|
|
if o in ("-r", "--debug-header"):
|
|||
|
|
# If DEBUG and DEBUGHEADER, then print just all header records
|
|||
|
|
DEBUGHEADER = True
|
|||
|
|
@@ -113,7 +113,7 @@ if len(args) == 1:
|
|||
|
|
FILENAME = args[0]
|
|||
|
|
else:
|
|||
|
|
usage()
|
|||
|
|
- print "ERROR: You have to specify .trd file to decode"
|
|||
|
|
+ print("ERROR: You have to specify .trd file to decode")
|
|||
|
|
sys.exit(2)
|
|||
|
|
|
|||
|
|
from struct import *
|
|||
|
|
@@ -428,7 +428,7 @@ def out( comment = "", skip = False):
|
|||
|
|
comment = comment % s
|
|||
|
|
else:
|
|||
|
|
comment = comment % bs[pos]
|
|||
|
|
- if DEBUG: print "%03d %s %s | %s | %03d" % (pos, toBin(bs[pos]),comment, s, (triple + pos))
|
|||
|
|
+ if DEBUG: print("%03d %s %s | %s | %03d" % (pos, toBin(bs[pos]),comment, s, (triple + pos)))
|
|||
|
|
if skip:
|
|||
|
|
pos += triple + 1
|
|||
|
|
return s.replace('`','') # Remove '`' character from words
|
|||
|
|
@@ -671,14 +671,14 @@ if DEBUG:
|
|||
|
|
s = decode(getRec(i))
|
|||
|
|
if DEBUGHEADER:
|
|||
|
|
# print s.split('\t')[0]
|
|||
|
|
- print s
|
|||
|
|
+ print(s)
|
|||
|
|
if DEBUGLIMIT > 0 and not s.endswith('\n'):
|
|||
|
|
DEBUG = True
|
|||
|
|
- print "-"*80
|
|||
|
|
- print "%s) at address %s" % (i, toBin(index[i]))
|
|||
|
|
- print
|
|||
|
|
+ print("-"*80)
|
|||
|
|
+ print("%s) at address %s" % (i, toBin(index[i])))
|
|||
|
|
+ print()
|
|||
|
|
s = decode(getRec(i))
|
|||
|
|
- print s
|
|||
|
|
+ print(s)
|
|||
|
|
DEBUGLIMIT -= 1
|
|||
|
|
DEBUG = True
|
|||
|
|
else:
|
|||
|
|
@@ -686,10 +686,10 @@ else:
|
|||
|
|
for i in range(1,entryCount):
|
|||
|
|
s = decode(getRec(i))
|
|||
|
|
if s.endswith('\n'):
|
|||
|
|
- print s,
|
|||
|
|
+ print(s, end=' ')
|
|||
|
|
else:
|
|||
|
|
- print s
|
|||
|
|
- print "!!! RECORD STRUCTURE DECODING ERROR !!!"
|
|||
|
|
- print "Please run this script in DEBUG mode and repair DATA BLOCK(S) section in function decode()"
|
|||
|
|
- print "If you succeed with whole dictionary send report (name of the dictionary and source code of script) to slovniky@googlegroups.com"
|
|||
|
|
+ print(s)
|
|||
|
|
+ print("!!! RECORD STRUCTURE DECODING ERROR !!!")
|
|||
|
|
+ print("Please run this script in DEBUG mode and repair DATA BLOCK(S) section in function decode()")
|
|||
|
|
+ print("If you succeed with whole dictionary send report (name of the dictionary and source code of script) to slovniky@googlegroups.com")
|
|||
|
|
break
|
|||
|
|
Index: stardict-tools-3.0.1/src/extractKangXi.py
|
|||
|
|
===================================================================
|
|||
|
|
--- stardict-tools-3.0.1.orig/src/extractKangXi.py
|
|||
|
|
+++ stardict-tools-3.0.1/src/extractKangXi.py
|
|||
|
|
@@ -1,4 +1,4 @@
|
|||
|
|
-#!/usr/bin/python
|
|||
|
|
+#!/usr/bin/python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
import sys, os, string, re, glob
|
|||
|
|
import libxml2dom
|
|||
|
|
@@ -20,7 +20,7 @@ num = 0
|
|||
|
|
errorfiles = []
|
|||
|
|
for filename in filelist:
|
|||
|
|
num += 1
|
|||
|
|
- print >> sys.stderr, filename, num, 'of', filenum
|
|||
|
|
+ print(filename, num, 'of', filenum, file=sys.stderr)
|
|||
|
|
try:
|
|||
|
|
fp = open(filename, 'r')
|
|||
|
|
doc = libxml2dom.parseString(fp.read(), html=1)
|
|||
|
|
@@ -29,7 +29,7 @@ for filename in filelist:
|
|||
|
|
style = re.search(r'(?s)\s*\.(\S+)\s*{\s*display:\s*none', style)
|
|||
|
|
displaynone = style.group(1)
|
|||
|
|
tabpages = doc.getElementsByTagName("div")
|
|||
|
|
- tabpages = filter(lambda s: s.getAttribute("class") == "tab-page", tabpages)
|
|||
|
|
+ tabpages = [s for s in tabpages if s.getAttribute("class") == "tab-page"]
|
|||
|
|
for tabpage in tabpages:
|
|||
|
|
found = False
|
|||
|
|
for node in tabpage.childNodes:
|
|||
|
|
@@ -45,16 +45,16 @@ for filename in filelist:
|
|||
|
|
paragraphs = tabpage.getElementsByTagName("p")
|
|||
|
|
thisitem = character + u'\t'
|
|||
|
|
for paragraph in paragraphs:
|
|||
|
|
- if paragraph.getAttribute("class") <> displaynone:
|
|||
|
|
+ if paragraph.getAttribute("class") != displaynone:
|
|||
|
|
#print TextInNode(paragraph).encode(fencoding)
|
|||
|
|
text = paragraph.textContent
|
|||
|
|
#text = filter(lambda s: not s in u' \t\r\n', text)
|
|||
|
|
text = re.sub(r'\s+', r' ', text)
|
|||
|
|
thisitem += text + u'\\n'
|
|||
|
|
- print thisitem.encode(fencoding)
|
|||
|
|
+ print(thisitem.encode(fencoding))
|
|||
|
|
except:
|
|||
|
|
- print >> sys.stderr, 'error occured'
|
|||
|
|
+ print('error occured', file=sys.stderr)
|
|||
|
|
errorfiles += [filename]
|
|||
|
|
continue
|
|||
|
|
if errorfiles:
|
|||
|
|
- print >> sys.stderr, 'Error files:', '\n'.join(errorfiles)
|
|||
|
|
+ print('Error files:', '\n'.join(errorfiles), file=sys.stderr)
|
|||
|
|
Index: stardict-tools-3.0.1/src/stmerge.py
|
|||
|
|
===================================================================
|
|||
|
|
--- stardict-tools-3.0.1.orig/src/stmerge.py
|
|||
|
|
+++ stardict-tools-3.0.1/src/stmerge.py
|
|||
|
|
@@ -1,18 +1,19 @@
|
|||
|
|
-import sys, string
|
|||
|
|
-base = {}
|
|||
|
|
-for line in sys.stdin.readlines():
|
|||
|
|
- words = string.split(line[:-1], '\t')
|
|||
|
|
- if len(words) != 2:
|
|||
|
|
- print "Error!"
|
|||
|
|
- exit
|
|||
|
|
- if base.has_key(words[0]):
|
|||
|
|
- base[words[0]] += [words[1]]
|
|||
|
|
- else:
|
|||
|
|
- base[words[0]] = [words[1]]
|
|||
|
|
-keys = base.keys()
|
|||
|
|
-keys.sort()
|
|||
|
|
-for key in keys:
|
|||
|
|
- print key,'\t',
|
|||
|
|
- for val in base[key]:
|
|||
|
|
- print val,',',
|
|||
|
|
- print
|
|||
|
|
+#!/usr/bin/python3
|
|||
|
|
+import sys, string
|
|||
|
|
+base = {}
|
|||
|
|
+for line in sys.stdin.readlines():
|
|||
|
|
+ words = string.split(line[:-1], '\t')
|
|||
|
|
+ if len(words) != 2:
|
|||
|
|
+ print("Error!")
|
|||
|
|
+ exit
|
|||
|
|
+ if words[0] in base:
|
|||
|
|
+ base[words[0]] += [words[1]]
|
|||
|
|
+ else:
|
|||
|
|
+ base[words[0]] = [words[1]]
|
|||
|
|
+keys = list(base.keys())
|
|||
|
|
+keys.sort()
|
|||
|
|
+for key in keys:
|
|||
|
|
+ print(key,'\t', end=' ')
|
|||
|
|
+ for val in base[key]:
|
|||
|
|
+ print(val,',', end=' ')
|
|||
|
|
+ print()
|
|||
|
|
Index: stardict-tools-3.0.1/src/KangXiZiDian-djvu2tiff.py
|
|||
|
|
===================================================================
|
|||
|
|
--- stardict-tools-3.0.1.orig/src/KangXiZiDian-djvu2tiff.py
|
|||
|
|
+++ stardict-tools-3.0.1/src/KangXiZiDian-djvu2tiff.py
|
|||
|
|
@@ -1,3 +1,4 @@
|
|||
|
|
+#!/usr/bin/python3
|
|||
|
|
# This tool convert KangXiZiDian djvu files to tiff files.
|
|||
|
|
# Download djvu files: http://bbs.dartmouth.edu/~fangq/KangXi/KangXi.tar
|
|||
|
|
# Character page info: http://wenq.org/unihan/Unihan.txt as kIRGKangXi field.
|
|||
|
|
Index: stardict-tools-3.0.1/src/hanzim2dict.py
|
|||
|
|
===================================================================
|
|||
|
|
--- stardict-tools-3.0.1.orig/src/hanzim2dict.py
|
|||
|
|
+++ stardict-tools-3.0.1/src/hanzim2dict.py
|
|||
|
|
@@ -1,4 +1,4 @@
|
|||
|
|
-#!/usr/bin/env python
|
|||
|
|
+#!/usr/bin/python3
|
|||
|
|
#
|
|||
|
|
# hanzim2dict
|
|||
|
|
#
|
|||
|
|
@@ -44,7 +44,7 @@ for line in lines:
|
|||
|
|
code = toUTF(fromGB(line[0])[0])[0]
|
|||
|
|
pinyin = line[2]
|
|||
|
|
definition = '<'+pinyin+'> '+line[3]+' ['+line[1]+']'
|
|||
|
|
- if wordmap.has_key(code):
|
|||
|
|
+ if code in wordmap:
|
|||
|
|
wordmap[code].add(definition)
|
|||
|
|
else:
|
|||
|
|
wordmap[code] = Word(code, definition)
|
|||
|
|
@@ -55,11 +55,11 @@ for filename in ("cidianf.gb", "sanzicid
|
|||
|
|
|
|||
|
|
for line in lines:
|
|||
|
|
if len(line) < 2:
|
|||
|
|
- print len(line)
|
|||
|
|
+ print(len(line))
|
|||
|
|
continue
|
|||
|
|
code = toUTF(fromGB(line[0][:-2])[0])[0]
|
|||
|
|
definition = line[1]+' ['+line[0][-1:]+']'
|
|||
|
|
- if wordmap.has_key(code):
|
|||
|
|
+ if code in wordmap:
|
|||
|
|
wordmap[code].add(definition)
|
|||
|
|
else:
|
|||
|
|
wordmap[code] = Word(code, definition)
|
|||
|
|
Index: stardict-tools-3.0.1/src/mkguangyunst.py
|
|||
|
|
===================================================================
|
|||
|
|
--- stardict-tools-3.0.1.orig/src/mkguangyunst.py
|
|||
|
|
+++ stardict-tools-3.0.1/src/mkguangyunst.py
|
|||
|
|
@@ -1,3 +1,4 @@
|
|||
|
|
+#!/usr/bin/python3
|
|||
|
|
import sys, string
|
|||
|
|
for line in sys.stdin.readlines():
|
|||
|
|
words = string.split(line[:-1], '\t')
|
|||
|
|
@@ -15,6 +16,6 @@ for line in sys.stdin.readlines():
|
|||
|
|
pinyin= words[13]
|
|||
|
|
psyun = words[22]
|
|||
|
|
if beizhu == '':
|
|||
|
|
- print "%s\t%s %s%s%s%s%s%s %sQIE PINYIN%s PSYUN%s\\n%s" % (romazi, muci, sheng, yunbu, she, hu, deng, diao, fanqie, pinyin, psyun, chars)
|
|||
|
|
+ print("%s\t%s %s%s%s%s%s%s %sQIE PINYIN%s PSYUN%s\\n%s" % (romazi, muci, sheng, yunbu, she, hu, deng, diao, fanqie, pinyin, psyun, chars))
|
|||
|
|
else:
|
|||
|
|
- print "%s\t%s %s%s%s%s%s%s %sQIE PINYIN%s PSYUN%s\\n%s\\n%s" % (romazi, muci, sheng, yunbu, she, hu, deng, diao, fanqie, pinyin, psyun, chars, beizhu)
|
|||
|
|
+ print("%s\t%s %s%s%s%s%s%s %sQIE PINYIN%s PSYUN%s\\n%s\\n%s" % (romazi, muci, sheng, yunbu, she, hu, deng, diao, fanqie, pinyin, psyun, chars, beizhu))
|
|||
|
|
Index: stardict-tools-3.0.1/src/uyghur2dict.py
|
|||
|
|
===================================================================
|
|||
|
|
--- stardict-tools-3.0.1.orig/src/uyghur2dict.py
|
|||
|
|
+++ stardict-tools-3.0.1/src/uyghur2dict.py
|
|||
|
|
@@ -1,4 +1,4 @@
|
|||
|
|
-#!/usr/bin/env python
|
|||
|
|
+#!/usr/bin/python3
|
|||
|
|
#
|
|||
|
|
# uyghur2dict
|
|||
|
|
# By Abdisalam (anatilim@gmail.com), inspired by Michael Robinson's hanzim2dict converter.
|
|||
|
|
@@ -41,7 +41,7 @@ lines = map(lambda x: split(x[:-1], '\t\
|
|||
|
|
for line in lines:
|
|||
|
|
code = line[0]
|
|||
|
|
definition = line[1]
|
|||
|
|
- if wordmap.has_key(code):
|
|||
|
|
+ if code in wordmap:
|
|||
|
|
wordmap[code].add(definition)
|
|||
|
|
else:
|
|||
|
|
wordmap[code] = Word(code, definition)
|
|||
|
|
@@ -84,4 +84,4 @@ ifo.write("author=Abdisalam\n")
|
|||
|
|
ifo.write("email=anatilim@gmail.com\n")
|
|||
|
|
ifo.write("description=感谢新疆维吾尔自治区语委会、新疆青少年出版社为我们提供《汉维词典》的词库\n")
|
|||
|
|
ifo.write("sametypesequence=m\n")
|
|||
|
|
-ifo.close()
|
|||
|
|
\ No newline at end of file
|
|||
|
|
+ifo.close()
|