Update Unicode data to 4.0. (#107974)

2003-07-30  Noah Levitt  <nlevitt@columbia.edu>

	* glib/gen-unicode-tables.pl:
	* glib/gunibreak.c:
	* glib/gunibreak.h:
	* glib/gunichartables.h:
	* glib/gunicode.h:
	* glib/gunicomp.h:
	* glib/gunidecomp.c:
	* glib/gunidecomp.h:
	* glib/guniprop.c:
	* tests/casefold.txt:
	* tests/casemap.txt:
	* tests/gen-casefold-txt.pl:
	* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
This commit is contained in:
Noah Levitt
2003-07-31 02:27:56 +00:00
committed by Noah Levitt
parent cdf72b09e6
commit 05f99527eb
19 changed files with 22213 additions and 8644 deletions

View File

@@ -1,4 +1,4 @@
# Test cases generated from Unicode 3.1 data
# Test cases generated from Unicode 4.0 data
# by gen-casefold-test.pl. Do not edit.
#
# Some special hand crafted tests
@@ -89,8 +89,7 @@ Z z
Ī ī
Ĭ ĭ
Į į
İ i
ı i
İ i̇
IJ ij
Ĵ ĵ
Ķ ķ
@@ -216,6 +215,7 @@ Z z
Ț ț
Ȝ ȝ
Ȟ ȟ
Ƞ ƞ
Ȣ ȣ
Ȥ ȥ
Ȧ ȧ
@@ -266,6 +266,7 @@ Z z
ϑ θ
ϕ φ
ϖ π
Ϙ ϙ
Ϛ ϛ
Ϝ ϝ
Ϟ ϟ
@@ -279,9 +280,11 @@ Z z
Ϯ ϯ
ϰ κ
ϱ ρ
ϲ σ
ϴ θ
ϵ ε
Ϸ ϸ
Ϲ ϲ
Ϻ ϻ
Ѐ ѐ
Ё ё
Ђ ђ
@@ -347,6 +350,7 @@ Z z
Ѽ ѽ
Ѿ ѿ
Ҁ ҁ
Ҋ ҋ
Ҍ ҍ
Ҏ ҏ
Ґ ґ
@@ -375,8 +379,11 @@ Z z
Ҿ ҿ
Ӂ ӂ
Ӄ ӄ
Ӆ ӆ
Ӈ ӈ
Ӊ ӊ
Ӌ ӌ
Ӎ ӎ
Ӑ ӑ
Ӓ ӓ
Ӕ ӕ
@@ -397,6 +404,14 @@ Z z
Ӳ ӳ
Ӵ ӵ
Ӹ ӹ
Ԁ ԁ
Ԃ ԃ
Ԅ ԅ
Ԇ ԇ
Ԉ ԉ
Ԋ ԋ
Ԍ ԍ
Ԏ ԏ
Ա ա
Բ բ
Գ գ
@@ -794,3 +809,43 @@ Z z
𐐀 𐐨
𐐁 𐐩
𐐂 𐐪
𐐃 𐐫
𐐄 𐐬
𐐅 𐐭
𐐆 𐐮
𐐇 𐐯
𐐈 𐐰
𐐉 𐐱
𐐊 𐐲
𐐋 𐐳
𐐌 𐐴
𐐍 𐐵
𐐎 𐐶
𐐏 𐐷
𐐐 𐐸
𐐑 𐐹
𐐒 𐐺
𐐓 𐐻
𐐔 𐐼
𐐕 𐐽
𐐖 𐐾
𐐗 𐐿
𐐘 𐑀
𐐙 𐑁
𐐚 𐑂
𐐛 𐑃
𐐜 𐑄
𐐝 𐑅
𐐞 𐑆
𐐟 𐑇
𐐠 𐑈
𐐡 𐑉
𐐢 𐑊
𐐣 𐑋
𐐤 𐑌
𐐥 𐑍
𐐦 𐑎
𐐧 𐑏

File diff suppressed because it is too large Load Diff

View File

@@ -24,6 +24,8 @@
# I consider the output of this program to be unrestricted. Use it as
# you will.
require 5.006;
# Names of fields in the CaseFolding table
$FOLDING_CODE = 0;
$FOLDING_STATUS = 1;
@@ -49,6 +51,7 @@ AaBbCc@@\taabbcc@@
#
EOT
binmode STDOUT, ":utf8";
open (INPUT, "< $ARGV[1]") || exit 1;
while (<INPUT>)
@@ -65,15 +68,14 @@ while (<INPUT>)
my $raw_code = $fields[$FOLDING_CODE];
my $code = hex ($raw_code);
next if $code > 0xffff; # FIXME!
if ($#fields != 3)
{
printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
next;
}
next if ($fields[$FOLDING_STATUS] eq 'S');
# skip simple and Turkic mappings
next if ($fields[$FOLDING_STATUS] =~ /^[ST]$/);
@values = map { hex ($_) } split /\s+/, $fields[$FOLDING_MAPPING];
printf ("%s\t%s\n", pack ("U", $code), pack ("U*", @values));

View File

@@ -23,6 +23,7 @@
# I consider the output of this program to be unrestricted. Use it as
# you will.
require 5.006;
use utf8;
if (@ARGV != 3) {
@@ -60,6 +61,7 @@ my @upper;
my @title;
my @lower;
binmode STDOUT, ":utf8";
open (INPUT, "< $ARGV[1]") || exit 1;
$last_code = -1;
@@ -74,8 +76,6 @@ while (<INPUT>)
$code = hex ($fields[$CODE]);
last if ($code > 0xFFFF); # ignore characters out of the basic plane
if ($code > $last_code + 1)
{
# Found a gap.
@@ -196,7 +196,7 @@ sub process_one
sub print_tests
{
for ($i = 0; $i < 0xffff; $i++) {
for ($i = 0; $i < 0x10ffff; $i++) {
if ($i == 0x3A3) {
# Greek sigma needs special tests
next;