diff --git a/glib/gen-iswide-table.py b/glib/gen-iswide-table.py deleted file mode 100755 index 292addc3d..000000000 --- a/glib/gen-iswide-table.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/python - -import sys - -W = {} -W['A'] = [] -W['W'] = [] -W['F'] = W['W'] - -for line in sys.stdin: - i = line.find ('#') - if i >= 0: - line = line[:i] - line = line.strip () - if not len (line): - continue - - fields = [x.strip () for x in line.split (';')] - chars = fields[0] - width = fields[1] - - if width not in ['A', 'W', 'F']: - continue - - if chars.find ('..') > 0: - (start,end) = chars.split ('..') - else: - start = chars - end = chars - start, end = int(start,16), int(end,16) - - for i in range (start, end+1): - W[width].append (i) - - -def write_intervals (S): - S.sort () - start = S[0]; - end = start - 1 - for c in S: - if c == end+1: - end += 1 - continue - else: - print "{0x%04X, 0x%04X}, " % (start, end) - start = c - end = start - print "{0x%04X, 0x%04X} " % (start, end) - - - -print "table for g_unichar_iswide():" -print -write_intervals (W['W']) -print -print "table for g_unichar_iswide_cjk():" -print -write_intervals (W['A']) diff --git a/glib/gen-unicode-tables.pl b/glib/gen-unicode-tables.pl index 37d9de99e..e6520b82e 100755 --- a/glib/gen-unicode-tables.pl +++ b/glib/gen-unicode-tables.pl @@ -161,6 +161,10 @@ my @special_cases; my @special_case_offsets; my $special_case_offset = 0; +# East asian widths + +my @eawidths; + $do_decomp = 0; $do_props = 1; if (@ARGV && $ARGV[0] eq '-decomp') @@ -177,10 +181,11 @@ elsif (@ARGV && $ARGV[0] eq '-both') if (@ARGV != 2) { $0 =~ s@.*/@@; - die "\nUsage: $0 [-decomp | -both] UNICODE-VERSION DIRECTORY\n\n DIRECTORY should contain the following Unicode data files:\n UnicodeData.txt, LineBreak.txt, SpecialCasing.txt, CaseFolding.txt,\n CompositionExclusions.txt\n\n"; + die "\nUsage: $0 [-decomp | -both] UNICODE-VERSION DIRECTORY\n\n DIRECTORY should contain the following Unicode data files:\n UnicodeData.txt, LineBreak.txt, SpecialCasing.txt, CaseFolding.txt,\n CompositionExclusions.txt extracted/DerivedEastAsianWidth.txt \n\n"; } -my ($unicodedatatxt, $linebreaktxt, $specialcasingtxt, $casefoldingtxt, $compositionexclusionstxt); +my ($unicodedatatxt, $linebreaktxt, $specialcasingtxt, $casefoldingtxt, $compositionexclusionstxt, + $derivedeastasianwidth); my $d = $ARGV[1]; opendir (my $dir, $d) or die "Cannot open Unicode data dir $d: $!\n"; @@ -193,11 +198,19 @@ for my $f (readdir ($dir)) $compositionexclusionstxt = "$d/$f" if ($f =~ /^CompositionExclusions.*\.txt/); } +my $extd = $ARGV[1] . "/extracted"; +opendir (my $extdir, $extd) or die "Cannot open Unicode/extracted data dir $extd: $!\n"; +for my $f (readdir ($extdir)) +{ + $derivedeastasianwidthtxt = "$extd/$f" if ($f =~ /^DerivedEastAsianWidth.*\.txt/); +} + defined $unicodedatatxt or die "Did not find UnicodeData file"; defined $linebreaktxt or die "Did not find LineBreak file"; defined $specialcasingtxt or die "Did not find SpecialCasing file"; defined $casefoldingtxt or die "Did not find CaseFolding file"; defined $compositionexclusionstxt or die "Did not find CompositionExclusions file"; +defined $derivedeastasianwidthtxt or die "Did not find DerivedEastAsianWidth file"; print "Creating decomp table\n" if ($do_decomp); print "Creating property table\n" if ($do_props); @@ -489,6 +502,31 @@ while () close INPUT; +print "Reading derived east asian widths\n"; + +open (INPUT, "< $derivedeastasianwidthtxt") || exit 1; + +while () +{ + my ($start_code, $end_code); + + chop; + + s/#.*//; + next if /^\s*$/; + if (!/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)\s*$/) { + die "Cannot parse line: '$_'\n"; + } + + if (defined $2) { + push @eawidths, [ hex $1, hex $2, $3 ]; + } else { + push @eawidths, [ hex $1, hex $1, $3 ]; + } +} + +close INPUT; + if ($do_props) { &print_tables ($last_code) } @@ -664,6 +702,11 @@ sub print_tables &output_special_case_table (\*OUT); &output_casefold_table (\*OUT); + # + # And the widths tables + # + &output_width_tables (\*OUT); + print OUT "#endif /* CHARTABLES_H */\n"; close (OUT); @@ -1334,5 +1377,53 @@ EOT printf "Generated %d bytes for casefold table\n", $recordlen * @casefold; } - +sub output_one_width_table +{ + my ($out, $name, $wpe) = @_; + my $start; + my $end; + my $wp; + my $rex; + print $out "static const struct Interval g_unicode_width_table_${name}[] = {\n"; + + $rex = qr/$wpe/; + + for (my $i = 0; $i <= $#eawidths; $i++) { + $start = $eawidths[$i]->[0]; + $end = $eawidths[$i]->[1]; + $wp = $eawidths[$i]->[2]; + + next if ($wp !~ $rex); + + while ($i <= $#eawidths - 1 && + $eawidths[$i + 1]->[0] == $end + 1 && + ($eawidths[$i + 1]->[2] =~ $rex)) { + $i++; + $end = $eawidths[$i]->[1]; + } + + printf $out "{0x%04X, 0x%04X},\n", $start, $end; + } + + printf $out "};\n\n"; +} + +sub output_width_tables +{ + my $out = shift; + + @eawidths = sort { $a->[0] <=> $b->[0] } @eawidths; + + print $out <