mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-06-28 15:14:51 +02:00
Update Unicode data to 4.0. (#107974)
2003-07-30 Noah Levitt <nlevitt@columbia.edu> * glib/gen-unicode-tables.pl: * glib/gunibreak.c: * glib/gunibreak.h: * glib/gunichartables.h: * glib/gunicode.h: * glib/gunicomp.h: * glib/gunidecomp.c: * glib/gunidecomp.h: * glib/guniprop.c: * tests/casefold.txt: * tests/casemap.txt: * tests/gen-casefold-txt.pl: * tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
This commit is contained in:
parent
cdf72b09e6
commit
05f99527eb
16
ChangeLog
16
ChangeLog
@ -1,3 +1,19 @@
|
|||||||
|
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
|
||||||
|
|
||||||
|
* glib/gen-unicode-tables.pl:
|
||||||
|
* glib/gunibreak.c:
|
||||||
|
* glib/gunibreak.h:
|
||||||
|
* glib/gunichartables.h:
|
||||||
|
* glib/gunicode.h:
|
||||||
|
* glib/gunicomp.h:
|
||||||
|
* glib/gunidecomp.c:
|
||||||
|
* glib/gunidecomp.h:
|
||||||
|
* glib/guniprop.c:
|
||||||
|
* tests/casefold.txt:
|
||||||
|
* tests/casemap.txt:
|
||||||
|
* tests/gen-casefold-txt.pl:
|
||||||
|
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
|
||||||
|
|
||||||
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
||||||
|
|
||||||
* glib/gspawn-win32.c: When possible, manage without the helper
|
* glib/gspawn-win32.c: When possible, manage without the helper
|
||||||
|
@ -1,3 +1,19 @@
|
|||||||
|
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
|
||||||
|
|
||||||
|
* glib/gen-unicode-tables.pl:
|
||||||
|
* glib/gunibreak.c:
|
||||||
|
* glib/gunibreak.h:
|
||||||
|
* glib/gunichartables.h:
|
||||||
|
* glib/gunicode.h:
|
||||||
|
* glib/gunicomp.h:
|
||||||
|
* glib/gunidecomp.c:
|
||||||
|
* glib/gunidecomp.h:
|
||||||
|
* glib/guniprop.c:
|
||||||
|
* tests/casefold.txt:
|
||||||
|
* tests/casemap.txt:
|
||||||
|
* tests/gen-casefold-txt.pl:
|
||||||
|
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
|
||||||
|
|
||||||
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
||||||
|
|
||||||
* glib/gspawn-win32.c: When possible, manage without the helper
|
* glib/gspawn-win32.c: When possible, manage without the helper
|
||||||
|
@ -1,3 +1,19 @@
|
|||||||
|
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
|
||||||
|
|
||||||
|
* glib/gen-unicode-tables.pl:
|
||||||
|
* glib/gunibreak.c:
|
||||||
|
* glib/gunibreak.h:
|
||||||
|
* glib/gunichartables.h:
|
||||||
|
* glib/gunicode.h:
|
||||||
|
* glib/gunicomp.h:
|
||||||
|
* glib/gunidecomp.c:
|
||||||
|
* glib/gunidecomp.h:
|
||||||
|
* glib/guniprop.c:
|
||||||
|
* tests/casefold.txt:
|
||||||
|
* tests/casemap.txt:
|
||||||
|
* tests/gen-casefold-txt.pl:
|
||||||
|
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
|
||||||
|
|
||||||
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
||||||
|
|
||||||
* glib/gspawn-win32.c: When possible, manage without the helper
|
* glib/gspawn-win32.c: When possible, manage without the helper
|
||||||
|
@ -1,3 +1,19 @@
|
|||||||
|
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
|
||||||
|
|
||||||
|
* glib/gen-unicode-tables.pl:
|
||||||
|
* glib/gunibreak.c:
|
||||||
|
* glib/gunibreak.h:
|
||||||
|
* glib/gunichartables.h:
|
||||||
|
* glib/gunicode.h:
|
||||||
|
* glib/gunicomp.h:
|
||||||
|
* glib/gunidecomp.c:
|
||||||
|
* glib/gunidecomp.h:
|
||||||
|
* glib/guniprop.c:
|
||||||
|
* tests/casefold.txt:
|
||||||
|
* tests/casemap.txt:
|
||||||
|
* tests/gen-casefold-txt.pl:
|
||||||
|
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
|
||||||
|
|
||||||
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
||||||
|
|
||||||
* glib/gspawn-win32.c: When possible, manage without the helper
|
* glib/gspawn-win32.c: When possible, manage without the helper
|
||||||
|
@ -1,3 +1,19 @@
|
|||||||
|
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
|
||||||
|
|
||||||
|
* glib/gen-unicode-tables.pl:
|
||||||
|
* glib/gunibreak.c:
|
||||||
|
* glib/gunibreak.h:
|
||||||
|
* glib/gunichartables.h:
|
||||||
|
* glib/gunicode.h:
|
||||||
|
* glib/gunicomp.h:
|
||||||
|
* glib/gunidecomp.c:
|
||||||
|
* glib/gunidecomp.h:
|
||||||
|
* glib/guniprop.c:
|
||||||
|
* tests/casefold.txt:
|
||||||
|
* tests/casemap.txt:
|
||||||
|
* tests/gen-casefold-txt.pl:
|
||||||
|
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
|
||||||
|
|
||||||
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
||||||
|
|
||||||
* glib/gspawn-win32.c: When possible, manage without the helper
|
* glib/gspawn-win32.c: When possible, manage without the helper
|
||||||
|
@ -1,3 +1,19 @@
|
|||||||
|
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
|
||||||
|
|
||||||
|
* glib/gen-unicode-tables.pl:
|
||||||
|
* glib/gunibreak.c:
|
||||||
|
* glib/gunibreak.h:
|
||||||
|
* glib/gunichartables.h:
|
||||||
|
* glib/gunicode.h:
|
||||||
|
* glib/gunicomp.h:
|
||||||
|
* glib/gunidecomp.c:
|
||||||
|
* glib/gunidecomp.h:
|
||||||
|
* glib/guniprop.c:
|
||||||
|
* tests/casefold.txt:
|
||||||
|
* tests/casemap.txt:
|
||||||
|
* tests/gen-casefold-txt.pl:
|
||||||
|
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
|
||||||
|
|
||||||
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
2003-07-31 Tor Lillqvist <tml@iki.fi>
|
||||||
|
|
||||||
* glib/gspawn-win32.c: When possible, manage without the helper
|
* glib/gspawn-win32.c: When possible, manage without the helper
|
||||||
|
@ -31,8 +31,12 @@
|
|||||||
# * For decomp table it might make sense to use a shift count other
|
# * For decomp table it might make sense to use a shift count other
|
||||||
# than 8. We could easily compute the perfect shift count.
|
# than 8. We could easily compute the perfect shift count.
|
||||||
|
|
||||||
|
# we use some perl unicode features
|
||||||
|
require 5.006;
|
||||||
|
|
||||||
use vars qw($CODE $NAME $CATEGORY $COMBINING_CLASSES $BIDI_CATEGORY $DECOMPOSITION $DECIMAL_VALUE $DIGIT_VALUE $NUMERIC_VALUE $MIRRORED $OLD_NAME $COMMENT $UPPER $LOWER $TITLE $BREAK_CODE $BREAK_CATEGORY $BREAK_NAME $CASE_CODE $CASE_LOWER $CASE_TITLE $CASE_UPPER $CASE_CONDITION);
|
use vars qw($CODE $NAME $CATEGORY $COMBINING_CLASSES $BIDI_CATEGORY $DECOMPOSITION $DECIMAL_VALUE $DIGIT_VALUE $NUMERIC_VALUE $MIRRORED $OLD_NAME $COMMENT $UPPER $LOWER $TITLE $BREAK_CODE $BREAK_CATEGORY $BREAK_NAME $CASE_CODE $CASE_LOWER $CASE_TITLE $CASE_UPPER $CASE_CONDITION);
|
||||||
|
|
||||||
|
|
||||||
# Names of fields in Unicode data table.
|
# Names of fields in Unicode data table.
|
||||||
$CODE = 0;
|
$CODE = 0;
|
||||||
$NAME = 1;
|
$NAME = 1;
|
||||||
@ -134,6 +138,8 @@ $FOLDING_MAPPING = 2;
|
|||||||
'PO' => "G_UNICODE_BREAK_POSTFIX",
|
'PO' => "G_UNICODE_BREAK_POSTFIX",
|
||||||
'SA' => "G_UNICODE_BREAK_COMPLEX_CONTEXT",
|
'SA' => "G_UNICODE_BREAK_COMPLEX_CONTEXT",
|
||||||
'AI' => "G_UNICODE_BREAK_AMBIGUOUS",
|
'AI' => "G_UNICODE_BREAK_AMBIGUOUS",
|
||||||
|
'NL' => "G_UNICODE_BREAK_NEXT_LINE",
|
||||||
|
'WJ' => "G_UNICODE_BREAK_WORD_JOINER",
|
||||||
'XX' => "G_UNICODE_BREAK_UNKNOWN"
|
'XX' => "G_UNICODE_BREAK_UNKNOWN"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -143,8 +149,9 @@ $FOLDING_MAPPING = 2;
|
|||||||
|
|
||||||
# Maximum length of special-case strings
|
# Maximum length of special-case strings
|
||||||
|
|
||||||
my $special_case_len = 0;
|
|
||||||
my @special_cases;
|
my @special_cases;
|
||||||
|
my @special_case_offsets;
|
||||||
|
my $special_case_offset = 0;
|
||||||
|
|
||||||
$do_decomp = 0;
|
$do_decomp = 0;
|
||||||
$do_props = 1;
|
$do_props = 1;
|
||||||
@ -193,6 +200,9 @@ print "Unicode data from $ARGV[1]\n";
|
|||||||
|
|
||||||
open (INPUT, "< $ARGV[1]") || exit 1;
|
open (INPUT, "< $ARGV[1]") || exit 1;
|
||||||
|
|
||||||
|
# we save memory by skipping the huge empty area before U+E0000
|
||||||
|
my $pages_before_e0000;
|
||||||
|
|
||||||
$last_code = -1;
|
$last_code = -1;
|
||||||
while (<INPUT>)
|
while (<INPUT>)
|
||||||
{
|
{
|
||||||
@ -205,7 +215,10 @@ while (<INPUT>)
|
|||||||
|
|
||||||
$code = hex ($fields[$CODE]);
|
$code = hex ($fields[$CODE]);
|
||||||
|
|
||||||
last if ($code > 0xFFFF); # ignore characters out of the basic plane
|
if ($code >= 0xE0000 and $last_code < 0xE0000)
|
||||||
|
{
|
||||||
|
$pages_before_e0000 = ($last_code >> 8) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
if ($code > $last_code + 1)
|
if ($code > $last_code + 1)
|
||||||
{
|
{
|
||||||
@ -237,12 +250,12 @@ close INPUT;
|
|||||||
|
|
||||||
@gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
|
@gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
|
||||||
'', '', '', '');
|
'', '', '', '');
|
||||||
for (++$last_code; $last_code < 0x10000; ++$last_code)
|
for (++$last_code; $last_code <= 0x10FFFF; ++$last_code)
|
||||||
{
|
{
|
||||||
$gfields{$CODE} = sprintf ("%04x", $last_code);
|
$gfields{$CODE} = sprintf ("%04x", $last_code);
|
||||||
&process_one ($last_code, @gfields);
|
&process_one ($last_code, @gfields);
|
||||||
}
|
}
|
||||||
--$last_code; # Want last to be 0xFFFF.
|
--$last_code; # Want last to be 0x10FFFF.
|
||||||
|
|
||||||
print "Creating line break table\n";
|
print "Creating line break table\n";
|
||||||
|
|
||||||
@ -268,7 +281,7 @@ while (<INPUT>)
|
|||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($fields[$CODE] =~ /([A-F0-9]{4})..([A-F0-9]{4})/)
|
if ($fields[$CODE] =~ /([A-F0-9]{4,6})\.\.([A-F0-9]{4,6})/)
|
||||||
{
|
{
|
||||||
$start_code = hex ($1);
|
$start_code = hex ($1);
|
||||||
$end_code = hex ($2);
|
$end_code = hex ($2);
|
||||||
@ -277,8 +290,6 @@ while (<INPUT>)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
last if ($start_code > 0xFFFF); # FIXME ignore characters out of the basic plane
|
|
||||||
|
|
||||||
if ($start_code > $last_code + 1)
|
if ($start_code > $last_code + 1)
|
||||||
{
|
{
|
||||||
# The gap represents undefined characters. If assigned,
|
# The gap represents undefined characters. If assigned,
|
||||||
@ -306,7 +317,7 @@ while (<INPUT>)
|
|||||||
|
|
||||||
close INPUT;
|
close INPUT;
|
||||||
|
|
||||||
for (++$last_code; $last_code < 0x10000; ++$last_code)
|
for (++$last_code; $last_code <= 0x10FFFF; ++$last_code)
|
||||||
{
|
{
|
||||||
if ($type[$last_code] eq 'Cn')
|
if ($type[$last_code] eq 'Cn')
|
||||||
{
|
{
|
||||||
@ -317,9 +328,9 @@ for (++$last_code; $last_code < 0x10000; ++$last_code)
|
|||||||
$break_props[$last_code] = 'AL';
|
$break_props[$last_code] = 'AL';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
--$last_code; # Want last to be 0xFFFF.
|
--$last_code; # Want last to be 0x10FFFF.
|
||||||
|
|
||||||
print STDERR "Last code is not 0xFFFF" if ($last_code != 0xFFFF);
|
print STDERR "Last code is not 0x10FFFF" if ($last_code != 0x10FFFF);
|
||||||
|
|
||||||
print "Reading special-casing table for case conversion\n";
|
print "Reading special-casing table for case conversion\n";
|
||||||
|
|
||||||
@ -403,22 +414,21 @@ while (<INPUT>)
|
|||||||
$raw_code = $fields[$FOLDING_CODE];
|
$raw_code = $fields[$FOLDING_CODE];
|
||||||
$code = hex ($raw_code);
|
$code = hex ($raw_code);
|
||||||
|
|
||||||
next if $code > 0xffff; # FIXME!
|
|
||||||
|
|
||||||
if ($#fields != 3)
|
if ($#fields != 3)
|
||||||
{
|
{
|
||||||
printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
|
printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
|
||||||
next if ($fields[$FOLDING_STATUS] eq 'S');
|
# we don't use Simple or Turkic rules here
|
||||||
|
next if ($fields[$FOLDING_STATUS] =~ /^[ST]$/);
|
||||||
|
|
||||||
@values = map { hex ($_) } split /\s+/, $fields[$FOLDING_MAPPING];
|
@values = map { hex ($_) } split /\s+/, $fields[$FOLDING_MAPPING];
|
||||||
|
|
||||||
# Check simple case
|
# Check simple case
|
||||||
|
|
||||||
if (@values == 1 &&
|
if (@values == 1 &&
|
||||||
!(defined $value[$code] && $value[$code] >= 0xd800 && $value[$code] < 0xdc00) &&
|
!(defined $value[$code] && $value[$code] >= 0x1000000) &&
|
||||||
defined $type[$code]) {
|
defined $type[$code]) {
|
||||||
|
|
||||||
my $lower;
|
my $lower;
|
||||||
@ -441,13 +451,12 @@ while (<INPUT>)
|
|||||||
}
|
}
|
||||||
|
|
||||||
my $string = pack ("U*", @values);
|
my $string = pack ("U*", @values);
|
||||||
$string =~ s/([\x80-\xff])/sprintf "\\x%02x",ord($1)/eg;
|
|
||||||
|
|
||||||
if (1 + length $string > $casefoldlen) {
|
if (1 + &length_in_bytes ($string) > $casefoldlen) {
|
||||||
$casefoldlen = 1 + length $string;
|
$casefoldlen = 1 + &length_in_bytes ($string);
|
||||||
}
|
}
|
||||||
|
|
||||||
push @casefold, [ $code, $string ];
|
push @casefold, [ $code, &escape ($string) ];
|
||||||
}
|
}
|
||||||
|
|
||||||
close INPUT;
|
close INPUT;
|
||||||
@ -464,6 +473,16 @@ if ($do_decomp) {
|
|||||||
|
|
||||||
exit 0;
|
exit 0;
|
||||||
|
|
||||||
|
|
||||||
|
# perl "length" returns the length in characters
|
||||||
|
sub length_in_bytes
|
||||||
|
{
|
||||||
|
my ($string) = @_;
|
||||||
|
|
||||||
|
use bytes;
|
||||||
|
return length $string;
|
||||||
|
}
|
||||||
|
|
||||||
# Process a single character.
|
# Process a single character.
|
||||||
sub process_one
|
sub process_one
|
||||||
{
|
{
|
||||||
@ -528,7 +547,11 @@ sub print_tables
|
|||||||
|
|
||||||
printf OUT "#define G_UNICODE_LAST_CHAR 0x%04x\n\n", $last;
|
printf OUT "#define G_UNICODE_LAST_CHAR 0x%04x\n\n", $last;
|
||||||
|
|
||||||
printf OUT "#define G_UNICODE_MAX_TABLE_INDEX 1000\n\n";
|
printf OUT "#define G_UNICODE_MAX_TABLE_INDEX 10000\n\n";
|
||||||
|
|
||||||
|
my $last_part1 = ($pages_before_e0000 * 256) - 1;
|
||||||
|
printf OUT "#define G_UNICODE_LAST_CHAR_PART1 0x%04X\n\n", $last_part1;
|
||||||
|
printf OUT "#define G_UNICODE_LAST_PAGE_PART1 %d\n\n", $pages_before_e0000 - 1;
|
||||||
|
|
||||||
$table_index = 0;
|
$table_index = 0;
|
||||||
printf OUT "static const char type_data[][256] = {\n";
|
printf OUT "static const char type_data[][256] = {\n";
|
||||||
@ -538,8 +561,9 @@ sub print_tables
|
|||||||
}
|
}
|
||||||
printf OUT "\n};\n\n";
|
printf OUT "\n};\n\n";
|
||||||
|
|
||||||
print OUT "static const short type_table[256] = {\n";
|
printf OUT "/* U+0000 through U+%04X */\n", $last_part1;
|
||||||
for ($count = 0; $count <= $last; $count += 256)
|
print OUT "static const gint16 type_table_part1[$pages_before_e0000] = {\n";
|
||||||
|
for ($count = 0; $count <= $last_part1; $count += 256)
|
||||||
{
|
{
|
||||||
print OUT ",\n" if $count > 0;
|
print OUT ",\n" if $count > 0;
|
||||||
print OUT " ", $row[$count / 256];
|
print OUT " ", $row[$count / 256];
|
||||||
@ -547,21 +571,32 @@ sub print_tables
|
|||||||
}
|
}
|
||||||
print OUT "\n};\n\n";
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
|
printf OUT "/* U+E0000 through U+%04X */\n", $last;
|
||||||
|
print OUT "static const gint16 type_table_part2[768] = {\n";
|
||||||
|
for ($count = 0xE0000; $count <= $last; $count += 256)
|
||||||
|
{
|
||||||
|
print OUT ",\n" if $count > 0xE0000;
|
||||||
|
print OUT " ", $row[$count / 256];
|
||||||
|
$bytes_out += 2;
|
||||||
|
}
|
||||||
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Now print attribute table.
|
# Now print attribute table.
|
||||||
#
|
#
|
||||||
|
|
||||||
$table_index = 0;
|
$table_index = 0;
|
||||||
printf OUT "static const unsigned short attr_data[][256] = {\n";
|
printf OUT "static const gunichar attr_data[][256] = {\n";
|
||||||
for ($count = 0; $count <= $last; $count += 256)
|
for ($count = 0; $count <= $last; $count += 256)
|
||||||
{
|
{
|
||||||
$row[$count / 256] = &print_row ($count, 2, \&fetch_attr);
|
$row[$count / 256] = &print_row ($count, 4, \&fetch_attr);
|
||||||
}
|
}
|
||||||
printf OUT "\n};\n\n";
|
printf OUT "\n};\n\n";
|
||||||
|
|
||||||
print OUT "static const short attr_table[256] = {\n";
|
printf OUT "/* U+0000 through U+%04X */\n", $last_part1;
|
||||||
for ($count = 0; $count <= $last; $count += 256)
|
print OUT "static const gint16 attr_table_part1[$pages_before_e0000] = {\n";
|
||||||
|
for ($count = 0; $count <= $last_part1; $count += 256)
|
||||||
{
|
{
|
||||||
print OUT ",\n" if $count > 0;
|
print OUT ",\n" if $count > 0;
|
||||||
print OUT " ", $row[$count / 256];
|
print OUT " ", $row[$count / 256];
|
||||||
@ -569,12 +604,21 @@ sub print_tables
|
|||||||
}
|
}
|
||||||
print OUT "\n};\n\n";
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
|
printf OUT "/* U+E0000 through U+%04X */\n", $last;
|
||||||
|
print OUT "static const gint16 attr_table_part2[768] = {\n";
|
||||||
|
for ($count = 0xE0000; $count <= $last; $count += 256)
|
||||||
|
{
|
||||||
|
print OUT ",\n" if $count > 0xE0000;
|
||||||
|
print OUT " ", $row[$count / 256];
|
||||||
|
$bytes_out += 2;
|
||||||
|
}
|
||||||
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
#
|
#
|
||||||
# print title case table
|
# print title case table
|
||||||
#
|
#
|
||||||
|
|
||||||
# FIXME: type.
|
print OUT "static const gunichar title_table[][3] = {\n";
|
||||||
print OUT "static const unsigned short title_table[][3] = {\n";
|
|
||||||
my ($item);
|
my ($item);
|
||||||
my ($first) = 1;
|
my ($first) = 1;
|
||||||
foreach $item (sort keys %title_to_lower)
|
foreach $item (sort keys %title_to_lower)
|
||||||
@ -583,7 +627,7 @@ sub print_tables
|
|||||||
unless $first;
|
unless $first;
|
||||||
$first = 0;
|
$first = 0;
|
||||||
printf OUT " { 0x%04x, 0x%04x, 0x%04x }", $item, $title_to_upper{$item}, $title_to_lower{$item};
|
printf OUT " { 0x%04x, 0x%04x, 0x%04x }", $item, $title_to_upper{$item}, $title_to_lower{$item};
|
||||||
$bytes_out += 6;
|
$bytes_out += 12;
|
||||||
}
|
}
|
||||||
print OUT "\n};\n\n";
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
@ -666,6 +710,40 @@ sub print_row
|
|||||||
return sprintf "%d /* page %d */", $table_index++, $start / 256;
|
return sprintf "%d /* page %d */", $table_index++, $start / 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub escape
|
||||||
|
{
|
||||||
|
my ($string) = @_;
|
||||||
|
|
||||||
|
$string =~ s/(\C)/sprintf "\\x%02x",ord($1)/eg;
|
||||||
|
|
||||||
|
return $string;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Returns the offset of $decomp in the offset string. Updates the
|
||||||
|
# referenced variables as appropriate.
|
||||||
|
sub handle_decomp ($$$$)
|
||||||
|
{
|
||||||
|
my ($decomp, $decomp_offsets_ref, $decomp_string_ref, $decomp_string_offset_ref) = @_;
|
||||||
|
my $offset = "G_UNICODE_NOT_PRESENT_OFFSET";
|
||||||
|
|
||||||
|
if (defined $decomp)
|
||||||
|
{
|
||||||
|
if (defined $decomp_offsets_ref->{$decomp})
|
||||||
|
{
|
||||||
|
$offset = $decomp_offsets_ref->{$decomp};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$offset = ${$decomp_string_offset_ref};
|
||||||
|
$decomp_offsets_ref->{$decomp} = $offset;
|
||||||
|
${$decomp_string_ref} .= "\n \"" . &escape ($decomp) . "\\0\" /* offset ${$decomp_string_offset_ref} */";
|
||||||
|
${$decomp_string_offset_ref} += &length_in_bytes ($decomp) + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $offset;
|
||||||
|
}
|
||||||
|
|
||||||
# Generate the character decomposition header.
|
# Generate the character decomposition header.
|
||||||
sub print_decomp
|
sub print_decomp
|
||||||
{
|
{
|
||||||
@ -684,19 +762,26 @@ sub print_decomp
|
|||||||
|
|
||||||
printf OUT "#define G_UNICODE_LAST_CHAR 0x%04x\n\n", $last;
|
printf OUT "#define G_UNICODE_LAST_CHAR 0x%04x\n\n", $last;
|
||||||
|
|
||||||
printf OUT "#define G_UNICODE_MAX_TABLE_INDEX 1000\n\n";
|
printf OUT "#define G_UNICODE_MAX_TABLE_INDEX (0x110000 / 256)\n\n";
|
||||||
|
|
||||||
|
my $last_part1 = ($pages_before_e0000 * 256) - 1;
|
||||||
|
printf OUT "#define G_UNICODE_LAST_CHAR_PART1 0x%04X\n\n", $last_part1;
|
||||||
|
printf OUT "#define G_UNICODE_LAST_PAGE_PART1 %d\n\n", $pages_before_e0000 - 1;
|
||||||
|
|
||||||
|
$NOT_PRESENT_OFFSET = 65535;
|
||||||
|
print OUT "#define G_UNICODE_NOT_PRESENT_OFFSET $NOT_PRESENT_OFFSET\n\n";
|
||||||
|
|
||||||
my ($count, @row);
|
my ($count, @row);
|
||||||
$table_index = 0;
|
$table_index = 0;
|
||||||
printf OUT "static const unsigned char cclass_data[][256] = {\n";
|
printf OUT "static const guchar cclass_data[][256] = {\n";
|
||||||
for ($count = 0; $count <= $last; $count += 256)
|
for ($count = 0; $count <= $last; $count += 256)
|
||||||
{
|
{
|
||||||
$row[$count / 256] = &print_row ($count, 1, \&fetch_cclass);
|
$row[$count / 256] = &print_row ($count, 1, \&fetch_cclass);
|
||||||
}
|
}
|
||||||
printf OUT "\n};\n\n";
|
printf OUT "\n};\n\n";
|
||||||
|
|
||||||
print OUT "static const short combining_class_table[256] = {\n";
|
print OUT "static const gint16 combining_class_table_part1[$pages_before_e0000] = {\n";
|
||||||
for ($count = 0; $count <= $last; $count += 256)
|
for ($count = 0; $count <= $last_part1; $count += 256)
|
||||||
{
|
{
|
||||||
print OUT ",\n" if $count > 0;
|
print OUT ",\n" if $count > 0;
|
||||||
print OUT " ", $row[$count / 256];
|
print OUT " ", $row[$count / 256];
|
||||||
@ -704,12 +789,19 @@ sub print_decomp
|
|||||||
}
|
}
|
||||||
print OUT "\n};\n\n";
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
|
print OUT "static const gint16 combining_class_table_part2[768] = {\n";
|
||||||
|
for ($count = 0xE0000; $count <= $last; $count += 256)
|
||||||
|
{
|
||||||
|
print OUT ",\n" if $count > 0xE0000;
|
||||||
|
print OUT " ", $row[$count / 256];
|
||||||
|
$bytes_out += 2;
|
||||||
|
}
|
||||||
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
print OUT "typedef struct\n{\n";
|
print OUT "typedef struct\n{\n";
|
||||||
# FIXME: type.
|
print OUT " gunichar ch;\n";
|
||||||
print OUT " unsigned short ch;\n";
|
print OUT " guint16 canon_offset;\n";
|
||||||
print OUT " unsigned char canon_offset;\n";
|
print OUT " guint16 compat_offset;\n";
|
||||||
print OUT " unsigned char compat_offset;\n";
|
|
||||||
print OUT " unsigned short expansion_offset;\n";
|
|
||||||
print OUT "} decomposition;\n\n";
|
print OUT "} decomposition;\n\n";
|
||||||
|
|
||||||
print OUT "static const decomposition decomp_table[] =\n{\n";
|
print OUT "static const decomposition decomp_table[] =\n{\n";
|
||||||
@ -737,40 +829,19 @@ sub print_decomp
|
|||||||
undef $compat_decomp;
|
undef $compat_decomp;
|
||||||
}
|
}
|
||||||
|
|
||||||
my $string = "";
|
my $canon_offset = handle_decomp ($canon_decomp, \%decomp_offsets, \$decomp_string, \$decomp_string_offset);
|
||||||
my $canon_offset = 0xff;
|
my $compat_offset = handle_decomp ($compat_decomp, \%decomp_offsets, \$decomp_string, \$decomp_string_offset);
|
||||||
my $compat_offset = 0xff;
|
|
||||||
|
|
||||||
if (defined $canon_decomp) {
|
die if $decomp_string_offset > $NOT_PRESENT_OFFSET;
|
||||||
$canon_offset = 0;
|
|
||||||
$string .= $canon_decomp;
|
|
||||||
}
|
|
||||||
if (defined $compat_decomp) {
|
|
||||||
if (defined $canon_decomp) {
|
|
||||||
$string .= "\\x00\\x00";
|
|
||||||
}
|
|
||||||
$compat_offset = (length $string) / 4;
|
|
||||||
$string .= $compat_decomp;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!defined($decomp_offsets{$string})) {
|
|
||||||
$decomp_offsets{$string} = $decomp_string_offset;
|
|
||||||
$decomp_string .= "\n \"".$string."\\0\\0\" /* offset ".
|
|
||||||
$decomp_string_offset." */";
|
|
||||||
$decomp_string_offset += ((length $string) / 4) + 2;
|
|
||||||
|
|
||||||
$bytes_out += (length $string) / 4 + 2; # "\x20"
|
|
||||||
}
|
|
||||||
|
|
||||||
printf OUT qq( { 0x%04x, %u, %u, %d }),
|
|
||||||
$count, $canon_offset, $compat_offset, $decomp_offsets{$string};
|
|
||||||
$bytes_out += 6;
|
|
||||||
|
|
||||||
|
printf OUT qq( { 0x%04x, $canon_offset, $compat_offset }), $count;
|
||||||
|
$bytes_out += 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
print OUT "\n};\n\n";
|
print OUT "\n};\n\n";
|
||||||
|
$bytes_out += $decomp_string_offset + 1;
|
||||||
|
|
||||||
printf OUT "static const guchar decomp_expansion_string[] = %s;\n\n", $decomp_string;
|
printf OUT "static const gchar decomp_expansion_string[] = %s;\n\n", $decomp_string;
|
||||||
|
|
||||||
print OUT "#endif /* DECOMP_H */\n";
|
print OUT "#endif /* DECOMP_H */\n";
|
||||||
|
|
||||||
@ -796,20 +867,25 @@ sub print_line_break
|
|||||||
|
|
||||||
print OUT "#define G_UNICODE_DATA_VERSION \"$ARGV[0]\"\n\n";
|
print OUT "#define G_UNICODE_DATA_VERSION \"$ARGV[0]\"\n\n";
|
||||||
|
|
||||||
printf OUT "#define G_UNICODE_LAST_CHAR 0x%04x\n\n", $last;
|
printf OUT "#define G_UNICODE_LAST_CHAR 0x%04X\n\n", $last;
|
||||||
|
|
||||||
printf OUT "#define G_UNICODE_MAX_TABLE_INDEX 1000\n\n";
|
printf OUT "#define G_UNICODE_MAX_TABLE_INDEX 10000\n\n";
|
||||||
|
|
||||||
|
my $last_part1 = ($pages_before_e0000 * 256) - 1;
|
||||||
|
printf OUT "/* the last code point that should be looked up in break_property_table_part1 */\n";
|
||||||
|
printf OUT "#define G_UNICODE_LAST_CHAR_PART1 0x%04X\n\n", $last_part1;
|
||||||
|
|
||||||
$table_index = 0;
|
$table_index = 0;
|
||||||
printf OUT "static const char break_property_data[][256] = {\n";
|
printf OUT "static const gint8 break_property_data[][256] = {\n";
|
||||||
for ($count = 0; $count <= $last; $count += 256)
|
for ($count = 0; $count <= $last; $count += 256)
|
||||||
{
|
{
|
||||||
$row[$count / 256] = &print_row ($count, 1, \&fetch_break_type);
|
$row[$count / 256] = &print_row ($count, 1, \&fetch_break_type);
|
||||||
}
|
}
|
||||||
printf OUT "\n};\n\n";
|
printf OUT "\n};\n\n";
|
||||||
|
|
||||||
print OUT "static const short break_property_table[256] = {\n";
|
printf OUT "/* U+0000 through U+%04X */\n", $last_part1;
|
||||||
for ($count = 0; $count <= $last; $count += 256)
|
print OUT "static const gint16 break_property_table_part1[$pages_before_e0000] = {\n";
|
||||||
|
for ($count = 0; $count <= $last_part1; $count += 256)
|
||||||
{
|
{
|
||||||
print OUT ",\n" if $count > 0;
|
print OUT ",\n" if $count > 0;
|
||||||
print OUT " ", $row[$count / 256];
|
print OUT " ", $row[$count / 256];
|
||||||
@ -817,6 +893,17 @@ sub print_line_break
|
|||||||
}
|
}
|
||||||
print OUT "\n};\n\n";
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
|
printf OUT "/* U+E0000 through U+%04X */\n", $last;
|
||||||
|
print OUT "static const gint16 break_property_table_part2[768] = {\n";
|
||||||
|
for ($count = 0xE0000; $count <= $last; $count += 256)
|
||||||
|
{
|
||||||
|
print OUT ",\n" if $count > 0xE0000;
|
||||||
|
print OUT " ", $row[$count / 256];
|
||||||
|
$bytes_out += 2;
|
||||||
|
}
|
||||||
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
|
|
||||||
print OUT "#endif /* BREAKTABLES_H */\n";
|
print OUT "#endif /* BREAKTABLES_H */\n";
|
||||||
|
|
||||||
close (OUT);
|
close (OUT);
|
||||||
@ -870,7 +957,7 @@ sub make_decomp
|
|||||||
my $result = "";
|
my $result = "";
|
||||||
foreach $iter (&expand_decomp ($code, $compat))
|
foreach $iter (&expand_decomp ($code, $compat))
|
||||||
{
|
{
|
||||||
$result .= sprintf "\\x%02x\\x%02x", $iter / 256, $iter & 0xff;
|
$result .= pack ("U", $iter); # to utf-8
|
||||||
}
|
}
|
||||||
|
|
||||||
$result;
|
$result;
|
||||||
@ -888,21 +975,17 @@ sub add_special_case
|
|||||||
|
|
||||||
|
|
||||||
for $value (@values) {
|
for $value (@values) {
|
||||||
$result .= sprintf ("\\x%02x\\x%02x", $value / 256, $value & 0xff);
|
$result .= pack ("U", $value); # to utf-8
|
||||||
}
|
}
|
||||||
|
|
||||||
$result .= "\\0";
|
push @special_case_offsets, $special_case_offset;
|
||||||
|
|
||||||
if (2 * @values + 2 > $special_case_len) {
|
# We encode special cases up in the 0x1000000 space
|
||||||
$special_case_len = 2 * @values + 2;
|
$value[$code] = 0x1000000 + $special_case_offset;
|
||||||
}
|
|
||||||
|
|
||||||
push @special_cases, $result;
|
$special_case_offset += 1 + &length_in_bytes ($result);
|
||||||
|
|
||||||
#
|
push @special_cases, &escape ($result);
|
||||||
# We encode special cases in the surrogate pair space
|
|
||||||
#
|
|
||||||
$value[$code] = 0xD800 + scalar(@special_cases) - 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sub output_special_case_table
|
sub output_special_case_table
|
||||||
@ -915,13 +998,15 @@ sub output_special_case_table
|
|||||||
* First, the best single character mapping to lowercase if Lu,
|
* First, the best single character mapping to lowercase if Lu,
|
||||||
* and to uppercase if Ll, followed by the output mapping for the two cases
|
* and to uppercase if Ll, followed by the output mapping for the two cases
|
||||||
* other than the case of the codepoint, in the order [Ll],[Lu],[Lt],
|
* other than the case of the codepoint, in the order [Ll],[Lu],[Lt],
|
||||||
* separated and terminated by a double NUL.
|
* encoded in UTF-8, separated and terminated by a null character.
|
||||||
*/
|
*/
|
||||||
static const guchar special_case_table[][$special_case_len] = {
|
static const gchar special_case_table[] = {
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
|
my $i = 0;
|
||||||
for $case (@special_cases) {
|
for $case (@special_cases) {
|
||||||
print $out qq( "$case",\n);
|
print $out qq( "$case\\0" /* offset ${special_case_offsets[$i]} */\n);
|
||||||
|
$i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
print $out <<EOT;
|
print $out <<EOT;
|
||||||
@ -929,7 +1014,7 @@ EOT
|
|||||||
|
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
print STDERR "Generated ", ($special_case_len * scalar @special_cases), " bytes in special case table\n";
|
print STDERR "Generated " . ($special_case_offset + 1) . " bytes in special case table\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
sub enumerate_ordered
|
sub enumerate_ordered
|
||||||
@ -962,16 +1047,22 @@ sub output_composition_table
|
|||||||
# decompositions. At the same time, record
|
# decompositions. At the same time, record
|
||||||
# the first and second character of each decomposition
|
# the first and second character of each decomposition
|
||||||
|
|
||||||
for $code (keys %compositions) {
|
for $code (keys %compositions)
|
||||||
|
{
|
||||||
@values = map { hex ($_) } split /\s+/, $compositions{$code};
|
@values = map { hex ($_) } split /\s+/, $compositions{$code};
|
||||||
|
|
||||||
|
# non-starters
|
||||||
if ($cclass[$values[0]]) {
|
if ($cclass[$values[0]]) {
|
||||||
delete $compositions{$code};
|
delete $compositions{$code};
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# single-character decompositions
|
||||||
if (@values == 1) {
|
if (@values == 1) {
|
||||||
delete $compositions{$code};
|
delete $compositions{$code};
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (@values != 2) {
|
if (@values != 2) {
|
||||||
die "$code has more than two elements in its decomposition!\n";
|
die "$code has more than two elements in its decomposition!\n";
|
||||||
}
|
}
|
||||||
@ -983,10 +1074,10 @@ sub output_composition_table
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Assign integer indicices, removing singletons
|
# Assign integer indices, removing singletons
|
||||||
my $n_first = enumerate_ordered (\%first);
|
my $n_first = enumerate_ordered (\%first);
|
||||||
|
|
||||||
# Now record the second character if each (non-singleton) decomposition
|
# Now record the second character of each (non-singleton) decomposition
|
||||||
for $code (keys %compositions) {
|
for $code (keys %compositions) {
|
||||||
@values = map { hex ($_) } split /\s+/, $compositions{$code};
|
@values = map { hex ($_) } split /\s+/, $compositions{$code};
|
||||||
|
|
||||||
@ -1065,27 +1156,31 @@ sub output_composition_table
|
|||||||
|
|
||||||
my @row;
|
my @row;
|
||||||
$table_index = 0;
|
$table_index = 0;
|
||||||
printf OUT "static const gushort compose_data[][256] = {\n";
|
printf OUT "static const guint16 compose_data[][256] = {\n";
|
||||||
for (my $count = 0; $count <= $last; $count += 256)
|
for (my $count = 0; $count <= $last; $count += 256)
|
||||||
{
|
{
|
||||||
$row[$count / 256] = &print_row ($count, 2, sub { exists $vals{$_[0]} ? $vals{$_[0]} : 0; });
|
$row[$count / 256] = &print_row ($count, 2, sub { exists $vals{$_[0]} ? $vals{$_[0]} : 0; });
|
||||||
}
|
}
|
||||||
printf OUT "\n};\n\n";
|
printf OUT "\n};\n\n";
|
||||||
|
|
||||||
print OUT "static const short compose_table[256] = {\n";
|
print OUT "static const gint16 compose_table[256] = {\n";
|
||||||
for (my $count = 0; $count <= $last; $count += 256)
|
for (my $count = 0; $count <= $last; $count += 256)
|
||||||
{
|
{
|
||||||
print OUT ",\n" if $count > 0;
|
print OUT ",\n" if $count > 0;
|
||||||
print OUT " ", $row[$count / 256];
|
print OUT " ", $row[$count / 256];
|
||||||
$bytes_out += 4;
|
|
||||||
}
|
}
|
||||||
print OUT "\n};\n\n";
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
|
$bytes_out += 256 * 2;
|
||||||
|
|
||||||
# Output first singletons
|
# Output first singletons
|
||||||
|
|
||||||
print OUT "static const gushort compose_first_single[][2] = {\n";
|
print OUT "static const guint16 compose_first_single[][2] = {\n";
|
||||||
$i = 0;
|
$i = 0;
|
||||||
for $record (@first_singletons) {
|
for $record (@first_singletons) {
|
||||||
|
if ($record->[1] > 0xFFFF or $record->[2] > 0xFFFF) {
|
||||||
|
die "time to switch compose_first_single to gunichar" ;
|
||||||
|
}
|
||||||
print OUT ",\n" if $i++ > 0;
|
print OUT ",\n" if $i++ > 0;
|
||||||
printf OUT " { %#06x, %#06x }", $record->[1], $record->[2];
|
printf OUT " { %#06x, %#06x }", $record->[1], $record->[2];
|
||||||
}
|
}
|
||||||
@ -1095,9 +1190,12 @@ sub output_composition_table
|
|||||||
|
|
||||||
# Output second singletons
|
# Output second singletons
|
||||||
|
|
||||||
print OUT "static const gushort compose_second_single[][2] = {\n";
|
print OUT "static const guint16 compose_second_single[][2] = {\n";
|
||||||
$i = 0;
|
$i = 0;
|
||||||
for $record (@second_singletons) {
|
for $record (@second_singletons) {
|
||||||
|
if ($record->[1] > 0xFFFF or $record->[2] > 0xFFFF) {
|
||||||
|
die "time to switch compose_second_single to gunichar";
|
||||||
|
}
|
||||||
print OUT ",\n" if $i++ > 0;
|
print OUT ",\n" if $i++ > 0;
|
||||||
printf OUT " { %#06x, %#06x }", $record->[1], $record->[2];
|
printf OUT " { %#06x, %#06x }", $record->[1], $record->[2];
|
||||||
}
|
}
|
||||||
@ -1108,7 +1206,7 @@ sub output_composition_table
|
|||||||
# Output array of composition pairs
|
# Output array of composition pairs
|
||||||
|
|
||||||
print OUT <<EOT;
|
print OUT <<EOT;
|
||||||
static const gushort compose_array[$n_first][$n_second] = {
|
static const guint16 compose_array[$n_first][$n_second] = {
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
for (my $i = 0; $i < $n_first; $i++) {
|
for (my $i = 0; $i < $n_first; $i++) {
|
||||||
@ -1117,7 +1215,10 @@ EOT
|
|||||||
for (my $j = 0; $j < $n_second; $j++) {
|
for (my $j = 0; $j < $n_second; $j++) {
|
||||||
print OUT ", " if $j;
|
print OUT ", " if $j;
|
||||||
if (exists $reverse{"$i|$j"}) {
|
if (exists $reverse{"$i|$j"}) {
|
||||||
printf OUT "%#06x", $reverse{"$i|$j"};
|
if ($reverse{"$i|$j"} > 0xFFFF) {
|
||||||
|
die "time to switch compose_array to gunichar" ;
|
||||||
|
}
|
||||||
|
printf OUT "0x%04x", $reverse{"$i|$j"};
|
||||||
} else {
|
} else {
|
||||||
print OUT " 0";
|
print OUT " 0";
|
||||||
}
|
}
|
||||||
@ -1151,10 +1252,16 @@ EOT
|
|||||||
|
|
||||||
@casefold = sort { $a->[0] <=> $b->[0] } @casefold;
|
@casefold = sort { $a->[0] <=> $b->[0] } @casefold;
|
||||||
|
|
||||||
for $case (@casefold) {
|
for $case (@casefold)
|
||||||
|
{
|
||||||
$code = $case->[0];
|
$code = $case->[0];
|
||||||
$string = $case->[1];
|
$string = $case->[1];
|
||||||
print $out sprintf(qq({ %#04x, "$string" },\n), $code);
|
|
||||||
|
if ($code > 0xFFFF) {
|
||||||
|
die "time to switch casefold_table to gunichar" ;
|
||||||
|
}
|
||||||
|
|
||||||
|
print $out sprintf(qq( { 0x%04x, "$string" },\n), $code);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,13 +25,22 @@
|
|||||||
#include "glib.h"
|
#include "glib.h"
|
||||||
#include "gunibreak.h"
|
#include "gunibreak.h"
|
||||||
|
|
||||||
|
#define TPROP_PART1(Page, Char) \
|
||||||
|
((break_property_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
|
? (break_property_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
|
: (break_property_data[break_property_table_part1[Page]][Char]))
|
||||||
|
|
||||||
#define TPROP(Page, Char) \
|
#define TPROP_PART2(Page, Char) \
|
||||||
((break_property_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
|
((break_property_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
? (break_property_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
|
? (break_property_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
: (break_property_data[break_property_table[Page]][Char]))
|
: (break_property_data[break_property_table_part2[Page]][Char]))
|
||||||
|
|
||||||
#define PROP(Char) (((Char) > (G_UNICODE_LAST_CHAR)) ? G_UNICODE_BREAK_UNKNOWN : TPROP ((Char) >> 8, (Char) & 0xff))
|
#define PROP(Char) \
|
||||||
|
(((Char) <= G_UNICODE_LAST_CHAR_PART1) \
|
||||||
|
? TPROP_PART1 ((Char) >> 8, (Char) & 0xff) \
|
||||||
|
: (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
|
||||||
|
? TPROP_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
|
||||||
|
: G_UNICODE_BREAK_UNKNOWN))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* g_unichar_break_type:
|
* g_unichar_break_type:
|
||||||
|
5736
glib/gunibreak.h
5736
glib/gunibreak.h
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -100,7 +100,9 @@ typedef enum
|
|||||||
G_UNICODE_BREAK_POSTFIX,
|
G_UNICODE_BREAK_POSTFIX,
|
||||||
G_UNICODE_BREAK_COMPLEX_CONTEXT,
|
G_UNICODE_BREAK_COMPLEX_CONTEXT,
|
||||||
G_UNICODE_BREAK_AMBIGUOUS,
|
G_UNICODE_BREAK_AMBIGUOUS,
|
||||||
G_UNICODE_BREAK_UNKNOWN
|
G_UNICODE_BREAK_UNKNOWN,
|
||||||
|
G_UNICODE_BREAK_NEXT_LINE,
|
||||||
|
G_UNICODE_BREAK_WORD_JOINER
|
||||||
} GUnicodeBreakType;
|
} GUnicodeBreakType;
|
||||||
|
|
||||||
/* Returns TRUE if current locale uses UTF-8 charset. If CHARSET is
|
/* Returns TRUE if current locale uses UTF-8 charset. If CHARSET is
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
#define COMPOSE_SECOND_START 357
|
#define COMPOSE_SECOND_START 357
|
||||||
#define COMPOSE_SECOND_SINGLE_START 388
|
#define COMPOSE_SECOND_SINGLE_START 388
|
||||||
|
|
||||||
static const gushort compose_data[][256] = {
|
static const guint16 compose_data[][256] = {
|
||||||
{ /* page 0, index 0 */
|
{ /* page 0, index 0 */
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
@ -222,7 +222,7 @@ static const gushort compose_data[][256] = {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static const short compose_table[256] = {
|
static const gint16 compose_table[256] = {
|
||||||
0 /* page 0 */,
|
0 /* page 0 */,
|
||||||
1 /* page 1 */,
|
1 /* page 1 */,
|
||||||
2 /* page 2 */,
|
2 /* page 2 */,
|
||||||
@ -274,7 +274,7 @@ static const short compose_table[256] = {
|
|||||||
15 /* page 48 */
|
15 /* page 48 */
|
||||||
};
|
};
|
||||||
|
|
||||||
static const gushort compose_first_single[][2] = {
|
static const guint16 compose_first_single[][2] = {
|
||||||
{ 0x0338, 0x226e },
|
{ 0x0338, 0x226e },
|
||||||
{ 0x0338, 0x2260 },
|
{ 0x0338, 0x2260 },
|
||||||
{ 0x0338, 0x226f },
|
{ 0x0338, 0x226f },
|
||||||
@ -486,7 +486,7 @@ static const gushort compose_first_single[][2] = {
|
|||||||
{ 0x3099, 0x30fa },
|
{ 0x3099, 0x30fa },
|
||||||
{ 0x3099, 0x30fe }
|
{ 0x3099, 0x30fe }
|
||||||
};
|
};
|
||||||
static const gushort compose_second_single[][2] = {
|
static const guint16 compose_second_single[][2] = {
|
||||||
{ 0x0627, 0x0622 },
|
{ 0x0627, 0x0622 },
|
||||||
{ 0x0627, 0x0623 },
|
{ 0x0627, 0x0623 },
|
||||||
{ 0x0627, 0x0625 },
|
{ 0x0627, 0x0625 },
|
||||||
@ -506,7 +506,7 @@ static const gushort compose_second_single[][2] = {
|
|||||||
{ 0x0dd9, 0x0ddc },
|
{ 0x0dd9, 0x0ddc },
|
||||||
{ 0x0dd9, 0x0dde }
|
{ 0x0dd9, 0x0dde }
|
||||||
};
|
};
|
||||||
static const gushort compose_array[146][31] = {
|
static const guint16 compose_array[146][31] = {
|
||||||
{ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x0100, 0x0102, 0x0226, 0x00c4, 0x1ea2, 0x00c5, 0, 0x01cd, 0x0200, 0x0202, 0, 0, 0, 0x1ea0, 0, 0x1e00, 0, 0, 0x0104, 0, 0, 0, 0, 0, 0, 0, 0 },
|
{ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x0100, 0x0102, 0x0226, 0x00c4, 0x1ea2, 0x00c5, 0, 0x01cd, 0x0200, 0x0202, 0, 0, 0, 0x1ea0, 0, 0x1e00, 0, 0, 0x0104, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||||
{ 0, 0, 0, 0, 0, 0, 0x1e02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e04, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e06, 0, 0, 0, 0 },
|
{ 0, 0, 0, 0, 0, 0, 0x1e02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e04, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e06, 0, 0, 0, 0 },
|
||||||
{ 0, 0x0106, 0x0108, 0, 0, 0, 0x010a, 0, 0, 0, 0, 0x010c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00c7, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
{ 0, 0x0106, 0x0108, 0, 0, 0, 0x010a, 0, 0, 0, 0, 0x010c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00c7, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||||
|
@ -28,13 +28,22 @@
|
|||||||
#include "gunicomp.h"
|
#include "gunicomp.h"
|
||||||
|
|
||||||
|
|
||||||
#define CC(Page, Char) \
|
#define CC_PART1(Page, Char) \
|
||||||
((combining_class_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
|
((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
? (combining_class_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
|
? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
: (cclass_data[combining_class_table[Page]][Char]))
|
: (cclass_data[combining_class_table_part1[Page]][Char]))
|
||||||
|
|
||||||
|
#define CC_PART2(Page, Char) \
|
||||||
|
((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
|
? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
|
: (cclass_data[combining_class_table_part2[Page]][Char]))
|
||||||
|
|
||||||
#define COMBINING_CLASS(Char) \
|
#define COMBINING_CLASS(Char) \
|
||||||
(((Char) > (G_UNICODE_LAST_CHAR)) ? 0 : CC((Char) >> 8, (Char) & 0xff))
|
(((Char) <= G_UNICODE_LAST_CHAR_PART1) \
|
||||||
|
? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
|
||||||
|
: (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
|
||||||
|
? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
|
||||||
|
: 0))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* g_unicode_canonical_ordering:
|
* g_unicode_canonical_ordering:
|
||||||
@ -84,7 +93,8 @@ g_unicode_canonical_ordering (gunichar *string,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static const guchar *
|
/* returns a pointer to a null-terminated UTF-8 string */
|
||||||
|
static const gchar *
|
||||||
find_decomposition (gunichar ch,
|
find_decomposition (gunichar ch,
|
||||||
gboolean compat)
|
gboolean compat)
|
||||||
{
|
{
|
||||||
@ -104,17 +114,17 @@ find_decomposition (gunichar ch,
|
|||||||
if (compat)
|
if (compat)
|
||||||
{
|
{
|
||||||
offset = decomp_table[half].compat_offset;
|
offset = decomp_table[half].compat_offset;
|
||||||
if (offset == 0xff)
|
if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
|
||||||
offset = decomp_table[half].canon_offset;
|
offset = decomp_table[half].canon_offset;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
offset = decomp_table[half].canon_offset;
|
offset = decomp_table[half].canon_offset;
|
||||||
if (offset == 0xff)
|
if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return &(decomp_expansion_string[decomp_table[half].expansion_offset + offset]);
|
return &(decomp_expansion_string[offset]);
|
||||||
}
|
}
|
||||||
else if (half == start)
|
else if (half == start)
|
||||||
break;
|
break;
|
||||||
@ -142,27 +152,20 @@ gunichar *
|
|||||||
g_unicode_canonical_decomposition (gunichar ch,
|
g_unicode_canonical_decomposition (gunichar ch,
|
||||||
gsize *result_len)
|
gsize *result_len)
|
||||||
{
|
{
|
||||||
const guchar *decomp = find_decomposition (ch, FALSE);
|
const gchar *decomp = find_decomposition (ch, FALSE);
|
||||||
|
const gchar *p;
|
||||||
gunichar *r;
|
gunichar *r;
|
||||||
|
|
||||||
if (decomp)
|
if (decomp)
|
||||||
{
|
{
|
||||||
/* Found it. */
|
/* Found it. */
|
||||||
int i, len;
|
int i;
|
||||||
/* We store as a double-nul terminated string. */
|
|
||||||
for (len = 0; (decomp[len] || decomp[len + 1]);
|
|
||||||
len += 2)
|
|
||||||
;
|
|
||||||
|
|
||||||
/* We've counted twice as many bytes as there are
|
*result_len = g_utf8_strlen (decomp, -1);
|
||||||
characters. */
|
r = g_malloc (*result_len * sizeof (gunichar));
|
||||||
*result_len = len / 2;
|
|
||||||
r = g_malloc (len / 2 * sizeof (gunichar));
|
|
||||||
|
|
||||||
for (i = 0; i < len; i += 2)
|
for (p = decomp, i = 0; *p != '\0'; p = g_utf8_next_char (p), i++)
|
||||||
{
|
r[i] = g_utf8_get_char (p);
|
||||||
r[i / 2] = (decomp[i] << 8 | decomp[i + 1]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -194,6 +197,7 @@ combine (gunichar a,
|
|||||||
gushort index_a, index_b;
|
gushort index_a, index_b;
|
||||||
|
|
||||||
index_a = COMPOSE_INDEX(a);
|
index_a = COMPOSE_INDEX(a);
|
||||||
|
|
||||||
if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
|
if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
|
||||||
{
|
{
|
||||||
if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
|
if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
|
||||||
@ -206,6 +210,7 @@ combine (gunichar a,
|
|||||||
}
|
}
|
||||||
|
|
||||||
index_b = COMPOSE_INDEX(b);
|
index_b = COMPOSE_INDEX(b);
|
||||||
|
|
||||||
if (index_b >= COMPOSE_SECOND_SINGLE_START)
|
if (index_b >= COMPOSE_SECOND_SINGLE_START)
|
||||||
{
|
{
|
||||||
if (a == compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
|
if (a == compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
|
||||||
@ -252,17 +257,10 @@ _g_utf8_normalize_wc (const gchar *str,
|
|||||||
{
|
{
|
||||||
gunichar wc = g_utf8_get_char (p);
|
gunichar wc = g_utf8_get_char (p);
|
||||||
|
|
||||||
const guchar *decomp = find_decomposition (wc, do_compat);
|
const gchar *decomp = find_decomposition (wc, do_compat);
|
||||||
|
|
||||||
if (decomp)
|
if (decomp)
|
||||||
{
|
n_wc += g_utf8_strlen (decomp, -1);
|
||||||
int len;
|
|
||||||
/* We store as a double-nul terminated string. */
|
|
||||||
for (len = 0; (decomp[len] || decomp[len + 1]);
|
|
||||||
len += 2)
|
|
||||||
;
|
|
||||||
n_wc += len / 2;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
n_wc++;
|
n_wc++;
|
||||||
|
|
||||||
@ -277,7 +275,7 @@ _g_utf8_normalize_wc (const gchar *str,
|
|||||||
while ((max_len < 0 || p < str + max_len) && *p)
|
while ((max_len < 0 || p < str + max_len) && *p)
|
||||||
{
|
{
|
||||||
gunichar wc = g_utf8_get_char (p);
|
gunichar wc = g_utf8_get_char (p);
|
||||||
const guchar *decomp;
|
const gchar *decomp;
|
||||||
int cc;
|
int cc;
|
||||||
gsize old_n_wc = n_wc;
|
gsize old_n_wc = n_wc;
|
||||||
|
|
||||||
@ -285,11 +283,9 @@ _g_utf8_normalize_wc (const gchar *str,
|
|||||||
|
|
||||||
if (decomp)
|
if (decomp)
|
||||||
{
|
{
|
||||||
int len;
|
const char *pd;
|
||||||
/* We store as a double-nul terminated string. */
|
for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
|
||||||
for (len = 0; (decomp[len] || decomp[len + 1]);
|
wc_buffer[n_wc++] = g_utf8_get_char (pd);
|
||||||
len += 2)
|
|
||||||
wc_buffer[n_wc++] = (decomp[len] << 8 | decomp[len + 1]);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
wc_buffer[n_wc++] = wc;
|
wc_buffer[n_wc++] = wc;
|
||||||
@ -318,7 +314,6 @@ _g_utf8_normalize_wc (const gchar *str,
|
|||||||
|
|
||||||
/* All decomposed and reordered */
|
/* All decomposed and reordered */
|
||||||
|
|
||||||
|
|
||||||
if (do_compose && n_wc > 0)
|
if (do_compose && n_wc > 0)
|
||||||
{
|
{
|
||||||
gsize i, j;
|
gsize i, j;
|
||||||
|
16241
glib/gunidecomp.h
16241
glib/gunidecomp.h
File diff suppressed because it is too large
Load Diff
@ -28,17 +28,30 @@
|
|||||||
#include "glib.h"
|
#include "glib.h"
|
||||||
#include "gunichartables.h"
|
#include "gunichartables.h"
|
||||||
|
|
||||||
|
#define ATTR_TABLE(Page) (((Page) <= G_UNICODE_LAST_PAGE_PART1) \
|
||||||
|
? attr_table_part1[Page] \
|
||||||
|
: attr_table_part2[(Page) - 0xe00])
|
||||||
|
|
||||||
#define ATTTABLE(Page, Char) \
|
#define ATTTABLE(Page, Char) \
|
||||||
((attr_table[Page] == G_UNICODE_MAX_TABLE_INDEX) ? 0 : (attr_data[attr_table[Page]][Char]))
|
((ATTR_TABLE(Page) == G_UNICODE_MAX_TABLE_INDEX) ? 0 : (attr_data[ATTR_TABLE(Page)][Char]))
|
||||||
|
|
||||||
#define TTYPE(Page, Char) \
|
#define TTYPE_PART1(Page, Char) \
|
||||||
((type_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
|
((type_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
? (type_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
|
? (type_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
: (type_data[type_table[Page]][Char]))
|
: (type_data[type_table_part1[Page]][Char]))
|
||||||
|
|
||||||
|
#define TTYPE_PART2(Page, Char) \
|
||||||
|
((type_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
|
? (type_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
|
||||||
|
: (type_data[type_table_part2[Page]][Char]))
|
||||||
|
|
||||||
|
#define TYPE(Char) \
|
||||||
|
(((Char) <= G_UNICODE_LAST_CHAR_PART1) \
|
||||||
|
? TTYPE_PART1 ((Char) >> 8, (Char) & 0xff) \
|
||||||
|
: (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
|
||||||
|
? TTYPE_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
|
||||||
|
: G_UNICODE_UNASSIGNED))
|
||||||
|
|
||||||
#define TYPE(Char) (((Char) > (G_UNICODE_LAST_CHAR)) ? G_UNICODE_UNASSIGNED : TTYPE ((Char) >> 8, (Char) & 0xff))
|
|
||||||
|
|
||||||
#define ISDIGIT(Type) ((Type) == G_UNICODE_DECIMAL_NUMBER \
|
#define ISDIGIT(Type) ((Type) == G_UNICODE_DECIMAL_NUMBER \
|
||||||
|| (Type) == G_UNICODE_LETTER_NUMBER \
|
|| (Type) == G_UNICODE_LETTER_NUMBER \
|
||||||
@ -361,10 +374,10 @@ g_unichar_toupper (gunichar c)
|
|||||||
if (t == G_UNICODE_LOWERCASE_LETTER)
|
if (t == G_UNICODE_LOWERCASE_LETTER)
|
||||||
{
|
{
|
||||||
gunichar val = ATTTABLE (c >> 8, c & 0xff);
|
gunichar val = ATTTABLE (c >> 8, c & 0xff);
|
||||||
if (val >= 0xd800 && val < 0xdc00)
|
if (val >= 0x1000000)
|
||||||
{
|
{
|
||||||
const guchar *p = special_case_table[val - 0xd800];
|
const guchar *p = special_case_table + val - 0x1000000;
|
||||||
return p[0] * 256 + p[1];
|
return g_utf8_get_char (p);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
return val ? val : c;
|
return val ? val : c;
|
||||||
@ -398,10 +411,10 @@ g_unichar_tolower (gunichar c)
|
|||||||
if (t == G_UNICODE_UPPERCASE_LETTER)
|
if (t == G_UNICODE_UPPERCASE_LETTER)
|
||||||
{
|
{
|
||||||
gunichar val = ATTTABLE (c >> 8, c & 0xff);
|
gunichar val = ATTTABLE (c >> 8, c & 0xff);
|
||||||
if (val >= 0xd800 && val < 0xdc00)
|
if (val >= 0x1000000)
|
||||||
{
|
{
|
||||||
const guchar *p = special_case_table[val - 0xd800];
|
const guchar *p = special_case_table + val - 0x1000000;
|
||||||
return p[0] * 256 + p[1];
|
return g_utf8_get_char (p);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
return val ? val : c;
|
return val ? val : c;
|
||||||
@ -561,31 +574,22 @@ output_marks (const char **p_inout,
|
|||||||
static gsize
|
static gsize
|
||||||
output_special_case (gchar *out_buffer,
|
output_special_case (gchar *out_buffer,
|
||||||
gsize len,
|
gsize len,
|
||||||
int index,
|
int offset,
|
||||||
int type,
|
int type,
|
||||||
int which)
|
int which)
|
||||||
{
|
{
|
||||||
const guchar *p = special_case_table[index];
|
const guchar *p = special_case_table + offset;
|
||||||
|
gint len;
|
||||||
|
|
||||||
if (type != G_UNICODE_TITLECASE_LETTER)
|
if (type != G_UNICODE_TITLECASE_LETTER)
|
||||||
p += 2; /* +2 to skip over "best single match" */
|
p = g_utf8_next_char (p);
|
||||||
|
|
||||||
if (which == 1)
|
if (which == 1)
|
||||||
{
|
p += strlen (p) + 1;
|
||||||
while (p[0] || p[1])
|
|
||||||
p += 2;
|
|
||||||
p += 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (TRUE)
|
len = strlen (p);
|
||||||
{
|
if (out_buffer)
|
||||||
gunichar ch = p[0] * 256 + p[1];
|
memcpy (out_buffer, p, len);
|
||||||
if (!ch)
|
|
||||||
break;
|
|
||||||
|
|
||||||
len += g_unichar_to_utf8 (ch, out_buffer ? out_buffer + len : NULL);
|
|
||||||
p += 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
@ -662,9 +666,9 @@ real_toupper (const gchar *str,
|
|||||||
{
|
{
|
||||||
val = ATTTABLE (c >> 8, c & 0xff);
|
val = ATTTABLE (c >> 8, c & 0xff);
|
||||||
|
|
||||||
if (val >= 0xd800 && val < 0xdc00)
|
if (val >= 0x1000000)
|
||||||
{
|
{
|
||||||
len += output_special_case (out_buffer, len, val - 0xd800, t,
|
len += output_special_case (out_buffer, len, val - 0x1000000, t,
|
||||||
t == G_UNICODE_LOWERCASE_LETTER ? 0 : 1);
|
t == G_UNICODE_LOWERCASE_LETTER ? 0 : 1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -785,9 +789,9 @@ real_tolower (const gchar *str,
|
|||||||
{
|
{
|
||||||
val = ATTTABLE (c >> 8, c & 0xff);
|
val = ATTTABLE (c >> 8, c & 0xff);
|
||||||
|
|
||||||
if (val >= 0xd800 && val < 0xdc00)
|
if (val >= 0x1000000)
|
||||||
{
|
{
|
||||||
len += output_special_case (out_buffer, len, val - 0xd800, t, 0);
|
len += output_special_case (out_buffer, len, val - 0x1000000, t, 0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
# Test cases generated from Unicode 3.1 data
|
# Test cases generated from Unicode 4.0 data
|
||||||
# by gen-casefold-test.pl. Do not edit.
|
# by gen-casefold-test.pl. Do not edit.
|
||||||
#
|
#
|
||||||
# Some special hand crafted tests
|
# Some special hand crafted tests
|
||||||
@ -89,8 +89,7 @@ Z z
|
|||||||
Ī ī
|
Ī ī
|
||||||
Ĭ ĭ
|
Ĭ ĭ
|
||||||
Į į
|
Į į
|
||||||
İ i
|
İ i̇
|
||||||
ı i
|
|
||||||
IJ ij
|
IJ ij
|
||||||
Ĵ ĵ
|
Ĵ ĵ
|
||||||
Ķ ķ
|
Ķ ķ
|
||||||
@ -216,6 +215,7 @@ Z z
|
|||||||
Ț ț
|
Ț ț
|
||||||
Ȝ ȝ
|
Ȝ ȝ
|
||||||
Ȟ ȟ
|
Ȟ ȟ
|
||||||
|
Ƞ ƞ
|
||||||
Ȣ ȣ
|
Ȣ ȣ
|
||||||
Ȥ ȥ
|
Ȥ ȥ
|
||||||
Ȧ ȧ
|
Ȧ ȧ
|
||||||
@ -266,6 +266,7 @@ Z z
|
|||||||
ϑ θ
|
ϑ θ
|
||||||
ϕ φ
|
ϕ φ
|
||||||
ϖ π
|
ϖ π
|
||||||
|
Ϙ ϙ
|
||||||
Ϛ ϛ
|
Ϛ ϛ
|
||||||
Ϝ ϝ
|
Ϝ ϝ
|
||||||
Ϟ ϟ
|
Ϟ ϟ
|
||||||
@ -279,9 +280,11 @@ Z z
|
|||||||
Ϯ ϯ
|
Ϯ ϯ
|
||||||
ϰ κ
|
ϰ κ
|
||||||
ϱ ρ
|
ϱ ρ
|
||||||
ϲ σ
|
|
||||||
ϴ θ
|
ϴ θ
|
||||||
ϵ ε
|
ϵ ε
|
||||||
|
Ϸ ϸ
|
||||||
|
Ϲ ϲ
|
||||||
|
Ϻ ϻ
|
||||||
Ѐ ѐ
|
Ѐ ѐ
|
||||||
Ё ё
|
Ё ё
|
||||||
Ђ ђ
|
Ђ ђ
|
||||||
@ -347,6 +350,7 @@ Z z
|
|||||||
Ѽ ѽ
|
Ѽ ѽ
|
||||||
Ѿ ѿ
|
Ѿ ѿ
|
||||||
Ҁ ҁ
|
Ҁ ҁ
|
||||||
|
Ҋ ҋ
|
||||||
Ҍ ҍ
|
Ҍ ҍ
|
||||||
Ҏ ҏ
|
Ҏ ҏ
|
||||||
Ґ ґ
|
Ґ ґ
|
||||||
@ -375,8 +379,11 @@ Z z
|
|||||||
Ҿ ҿ
|
Ҿ ҿ
|
||||||
Ӂ ӂ
|
Ӂ ӂ
|
||||||
Ӄ ӄ
|
Ӄ ӄ
|
||||||
|
Ӆ ӆ
|
||||||
Ӈ ӈ
|
Ӈ ӈ
|
||||||
|
Ӊ ӊ
|
||||||
Ӌ ӌ
|
Ӌ ӌ
|
||||||
|
Ӎ ӎ
|
||||||
Ӑ ӑ
|
Ӑ ӑ
|
||||||
Ӓ ӓ
|
Ӓ ӓ
|
||||||
Ӕ ӕ
|
Ӕ ӕ
|
||||||
@ -397,6 +404,14 @@ Z z
|
|||||||
Ӳ ӳ
|
Ӳ ӳ
|
||||||
Ӵ ӵ
|
Ӵ ӵ
|
||||||
Ӹ ӹ
|
Ӹ ӹ
|
||||||
|
Ԁ ԁ
|
||||||
|
Ԃ ԃ
|
||||||
|
Ԅ ԅ
|
||||||
|
Ԇ ԇ
|
||||||
|
Ԉ ԉ
|
||||||
|
Ԋ ԋ
|
||||||
|
Ԍ ԍ
|
||||||
|
Ԏ ԏ
|
||||||
Ա ա
|
Ա ա
|
||||||
Բ բ
|
Բ բ
|
||||||
Գ գ
|
Գ գ
|
||||||
@ -794,3 +809,43 @@ Z z
|
|||||||
X x
|
X x
|
||||||
Y y
|
Y y
|
||||||
Z z
|
Z z
|
||||||
|
𐐀 𐐨
|
||||||
|
𐐁 𐐩
|
||||||
|
𐐂 𐐪
|
||||||
|
𐐃 𐐫
|
||||||
|
𐐄 𐐬
|
||||||
|
𐐅 𐐭
|
||||||
|
𐐆 𐐮
|
||||||
|
𐐇 𐐯
|
||||||
|
𐐈 𐐰
|
||||||
|
𐐉 𐐱
|
||||||
|
𐐊 𐐲
|
||||||
|
𐐋 𐐳
|
||||||
|
𐐌 𐐴
|
||||||
|
𐐍 𐐵
|
||||||
|
𐐎 𐐶
|
||||||
|
𐐏 𐐷
|
||||||
|
𐐐 𐐸
|
||||||
|
𐐑 𐐹
|
||||||
|
𐐒 𐐺
|
||||||
|
𐐓 𐐻
|
||||||
|
𐐔 𐐼
|
||||||
|
𐐕 𐐽
|
||||||
|
𐐖 𐐾
|
||||||
|
𐐗 𐐿
|
||||||
|
𐐘 𐑀
|
||||||
|
𐐙 𐑁
|
||||||
|
𐐚 𐑂
|
||||||
|
𐐛 𐑃
|
||||||
|
𐐜 𐑄
|
||||||
|
𐐝 𐑅
|
||||||
|
𐐞 𐑆
|
||||||
|
𐐟 𐑇
|
||||||
|
𐐠 𐑈
|
||||||
|
𐐡 𐑉
|
||||||
|
𐐢 𐑊
|
||||||
|
𐐣 𐑋
|
||||||
|
𐐤 𐑌
|
||||||
|
𐐥 𐑍
|
||||||
|
𐐦 𐑎
|
||||||
|
𐐧 𐑏
|
||||||
|
1121
tests/casemap.txt
1121
tests/casemap.txt
File diff suppressed because it is too large
Load Diff
@ -24,6 +24,8 @@
|
|||||||
# I consider the output of this program to be unrestricted. Use it as
|
# I consider the output of this program to be unrestricted. Use it as
|
||||||
# you will.
|
# you will.
|
||||||
|
|
||||||
|
require 5.006;
|
||||||
|
|
||||||
# Names of fields in the CaseFolding table
|
# Names of fields in the CaseFolding table
|
||||||
$FOLDING_CODE = 0;
|
$FOLDING_CODE = 0;
|
||||||
$FOLDING_STATUS = 1;
|
$FOLDING_STATUS = 1;
|
||||||
@ -49,6 +51,7 @@ AaBbCc@@\taabbcc@@
|
|||||||
#
|
#
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
|
binmode STDOUT, ":utf8";
|
||||||
open (INPUT, "< $ARGV[1]") || exit 1;
|
open (INPUT, "< $ARGV[1]") || exit 1;
|
||||||
|
|
||||||
while (<INPUT>)
|
while (<INPUT>)
|
||||||
@ -65,15 +68,14 @@ while (<INPUT>)
|
|||||||
my $raw_code = $fields[$FOLDING_CODE];
|
my $raw_code = $fields[$FOLDING_CODE];
|
||||||
my $code = hex ($raw_code);
|
my $code = hex ($raw_code);
|
||||||
|
|
||||||
next if $code > 0xffff; # FIXME!
|
|
||||||
|
|
||||||
if ($#fields != 3)
|
if ($#fields != 3)
|
||||||
{
|
{
|
||||||
printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
|
printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
|
||||||
next if ($fields[$FOLDING_STATUS] eq 'S');
|
# skip simple and Turkic mappings
|
||||||
|
next if ($fields[$FOLDING_STATUS] =~ /^[ST]$/);
|
||||||
|
|
||||||
@values = map { hex ($_) } split /\s+/, $fields[$FOLDING_MAPPING];
|
@values = map { hex ($_) } split /\s+/, $fields[$FOLDING_MAPPING];
|
||||||
printf ("%s\t%s\n", pack ("U", $code), pack ("U*", @values));
|
printf ("%s\t%s\n", pack ("U", $code), pack ("U*", @values));
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
# I consider the output of this program to be unrestricted. Use it as
|
# I consider the output of this program to be unrestricted. Use it as
|
||||||
# you will.
|
# you will.
|
||||||
|
|
||||||
|
require 5.006;
|
||||||
use utf8;
|
use utf8;
|
||||||
|
|
||||||
if (@ARGV != 3) {
|
if (@ARGV != 3) {
|
||||||
@ -60,6 +61,7 @@ my @upper;
|
|||||||
my @title;
|
my @title;
|
||||||
my @lower;
|
my @lower;
|
||||||
|
|
||||||
|
binmode STDOUT, ":utf8";
|
||||||
open (INPUT, "< $ARGV[1]") || exit 1;
|
open (INPUT, "< $ARGV[1]") || exit 1;
|
||||||
|
|
||||||
$last_code = -1;
|
$last_code = -1;
|
||||||
@ -74,8 +76,6 @@ while (<INPUT>)
|
|||||||
|
|
||||||
$code = hex ($fields[$CODE]);
|
$code = hex ($fields[$CODE]);
|
||||||
|
|
||||||
last if ($code > 0xFFFF); # ignore characters out of the basic plane
|
|
||||||
|
|
||||||
if ($code > $last_code + 1)
|
if ($code > $last_code + 1)
|
||||||
{
|
{
|
||||||
# Found a gap.
|
# Found a gap.
|
||||||
@ -196,7 +196,7 @@ sub process_one
|
|||||||
|
|
||||||
sub print_tests
|
sub print_tests
|
||||||
{
|
{
|
||||||
for ($i = 0; $i < 0xffff; $i++) {
|
for ($i = 0; $i < 0x10ffff; $i++) {
|
||||||
if ($i == 0x3A3) {
|
if ($i == 0x3A3) {
|
||||||
# Greek sigma needs special tests
|
# Greek sigma needs special tests
|
||||||
next;
|
next;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user