#!/usr/bin/env python3 # Copyright (C) 1998, 1999 Tom Tromey # Copyright (C) 2001 Red Hat Software # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . """ gen-casefold-txt.py - Generate test cases for casefolding from Unicode data. See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html Usage: I consider the output of this program to be unrestricted. Use it as you will. """ import sys import argparse def main(argv): parser = argparse.ArgumentParser( description="Generate test cases for casefolding from Unicode data") parser.add_argument("UNICODE-VERSION") parser.add_argument("CaseFolding.txt") args = parser.parse_args(argv[1:]) version = getattr(args, "UNICODE-VERSION") filename = getattr(args, "CaseFolding.txt") print("""\ # Test cases generated from Unicode {} data # by gen-casefold-txt.py. Do not edit. # # Some special hand crafted tests # AaBbCc@@\taabbcc@@ # # Now the automatic tests #""".format(version)) # Names of fields in the CaseFolding table CODE, STATUS, MAPPING = range(3) with open(filename, encoding="utf-8") as fileobj: for line in fileobj: # strip comments and skip empty lines line = line.split("#", 1)[0].strip() if not line: continue fields = [f.strip() for f in line.split(";", 3)[:3]] if len(fields) != 3: raise SystemExit( "Entry for %s has wrong number of fields (%d)" % ( fields[CODE], len(fields))) status = fields[STATUS] # skip simple and Turkic mappings if status in "ST": continue code = chr(int(fields[CODE], 16)) values = "".join( [chr(int(v, 16)) for v in fields[MAPPING].split()]) print("{}\t{}".format(code, values)) if __name__ == "__main__": sys.exit(main(sys.argv))