Merge branch 'unicode-15' into 'main'

Unicode 15 support

Closes #2735

See merge request GNOME/glib!2877
This commit is contained in:
Marco Trevisan 2022-10-10 14:20:17 +00:00
commit cc02e8720d
12 changed files with 42934 additions and 41886 deletions

View File

@ -795,7 +795,7 @@ sub print_row
my ($column) = 4;
for ($i = $start; $i < $start + 256; ++$i)
{
print OUT ", "
print OUT ","
if $i > $start;
my ($text) = $values[$i - $start];
if (length ($text) + $column + 2 > 78)
@ -803,6 +803,10 @@ sub print_row
print OUT "\n ";
$column = 4;
}
else
{
print OUT " "
}
print OUT $text;
$column += length ($text) + 2;
}

View File

@ -1113,7 +1113,7 @@ static const guchar g_script_easy_table[8192] = {
G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_KANNADA,
G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_KANNADA,
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_KANNADA,
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
@ -1271,7 +1271,7 @@ static const guchar g_script_easy_table[8192] = {
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_LAO,
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO,
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO,
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_UNKNOWN,
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO,
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO,
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO,
@ -3039,6 +3039,7 @@ static const struct {
{ 0x10e80, 42, G_UNICODE_SCRIPT_YEZIDI },
{ 0x10eab, 3, G_UNICODE_SCRIPT_YEZIDI },
{ 0x10eb0, 2, G_UNICODE_SCRIPT_YEZIDI },
{ 0x10efd, 3, G_UNICODE_SCRIPT_ARABIC },
{ 0x10f00, 40, G_UNICODE_SCRIPT_OLD_SOGDIAN },
{ 0x10f30, 42, G_UNICODE_SCRIPT_SOGDIAN },
{ 0x10f70, 26, G_UNICODE_SCRIPT_OLD_UYGHUR },
@ -3057,7 +3058,7 @@ static const struct {
{ 0x11180, 96, G_UNICODE_SCRIPT_SHARADA },
{ 0x111e1, 20, G_UNICODE_SCRIPT_SINHALA },
{ 0x11200, 18, G_UNICODE_SCRIPT_KHOJKI },
{ 0x11213, 44, G_UNICODE_SCRIPT_KHOJKI },
{ 0x11213, 47, G_UNICODE_SCRIPT_KHOJKI },
{ 0x11280, 7, G_UNICODE_SCRIPT_MULTANI },
{ 0x11288, 1, G_UNICODE_SCRIPT_MULTANI },
{ 0x1128a, 4, G_UNICODE_SCRIPT_MULTANI },
@ -3113,6 +3114,7 @@ static const struct {
{ 0x11a50, 83, G_UNICODE_SCRIPT_SOYOMBO },
{ 0x11ab0, 16, G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL },
{ 0x11ac0, 57, G_UNICODE_SCRIPT_PAU_CIN_HAU },
{ 0x11b00, 10, G_UNICODE_SCRIPT_DEVANAGARI },
{ 0x11c00, 9, G_UNICODE_SCRIPT_BHAIKSUKI },
{ 0x11c0a, 45, G_UNICODE_SCRIPT_BHAIKSUKI },
{ 0x11c38, 14, G_UNICODE_SCRIPT_BHAIKSUKI },
@ -3134,6 +3136,9 @@ static const struct {
{ 0x11d93, 6, G_UNICODE_SCRIPT_GUNJALA_GONDI },
{ 0x11da0, 10, G_UNICODE_SCRIPT_GUNJALA_GONDI },
{ 0x11ee0, 25, G_UNICODE_SCRIPT_MAKASAR },
{ 0x11f00, 17, G_UNICODE_SCRIPT_KAWI },
{ 0x11f12, 41, G_UNICODE_SCRIPT_KAWI },
{ 0x11f3e, 28, G_UNICODE_SCRIPT_KAWI },
{ 0x11fb0, 1, G_UNICODE_SCRIPT_LISU },
{ 0x11fc0, 50, G_UNICODE_SCRIPT_TAMIL },
{ 0x11fff, 1, G_UNICODE_SCRIPT_TAMIL },
@ -3142,8 +3147,7 @@ static const struct {
{ 0x12470, 5, G_UNICODE_SCRIPT_CUNEIFORM },
{ 0x12480, 196, G_UNICODE_SCRIPT_CUNEIFORM },
{ 0x12f90, 99, G_UNICODE_SCRIPT_CYPRO_MINOAN },
{ 0x13000, 1071, G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS },
{ 0x13430, 9, G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS },
{ 0x13000, 1110, G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS },
{ 0x14400, 583, G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS },
{ 0x16800, 569, G_UNICODE_SCRIPT_BAMUM },
{ 0x16a40, 31, G_UNICODE_SCRIPT_MRO },
@ -3177,7 +3181,9 @@ static const struct {
{ 0x1b000, 1, G_UNICODE_SCRIPT_KATAKANA },
{ 0x1b001, 287, G_UNICODE_SCRIPT_HIRAGANA },
{ 0x1b120, 3, G_UNICODE_SCRIPT_KATAKANA },
{ 0x1b132, 1, G_UNICODE_SCRIPT_HIRAGANA },
{ 0x1b150, 3, G_UNICODE_SCRIPT_HIRAGANA },
{ 0x1b155, 1, G_UNICODE_SCRIPT_KATAKANA },
{ 0x1b164, 4, G_UNICODE_SCRIPT_KATAKANA },
{ 0x1b170, 396, G_UNICODE_SCRIPT_NUSHU },
{ 0x1bc00, 107, G_UNICODE_SCRIPT_DUPLOYAN },
@ -3201,6 +3207,7 @@ static const struct {
{ 0x1d1aa, 4, G_UNICODE_SCRIPT_INHERITED },
{ 0x1d1ae, 61, G_UNICODE_SCRIPT_COMMON },
{ 0x1d200, 70, G_UNICODE_SCRIPT_GREEK },
{ 0x1d2c0, 20, G_UNICODE_SCRIPT_COMMON },
{ 0x1d2e0, 20, G_UNICODE_SCRIPT_COMMON },
{ 0x1d300, 87, G_UNICODE_SCRIPT_COMMON },
{ 0x1d360, 25, G_UNICODE_SCRIPT_COMMON },
@ -3229,11 +3236,14 @@ static const struct {
{ 0x1da9b, 5, G_UNICODE_SCRIPT_SIGNWRITING },
{ 0x1daa1, 15, G_UNICODE_SCRIPT_SIGNWRITING },
{ 0x1df00, 31, G_UNICODE_SCRIPT_LATIN },
{ 0x1df25, 6, G_UNICODE_SCRIPT_LATIN },
{ 0x1e000, 7, G_UNICODE_SCRIPT_GLAGOLITIC },
{ 0x1e008, 17, G_UNICODE_SCRIPT_GLAGOLITIC },
{ 0x1e01b, 7, G_UNICODE_SCRIPT_GLAGOLITIC },
{ 0x1e023, 2, G_UNICODE_SCRIPT_GLAGOLITIC },
{ 0x1e026, 5, G_UNICODE_SCRIPT_GLAGOLITIC },
{ 0x1e030, 62, G_UNICODE_SCRIPT_CYRILLIC },
{ 0x1e08f, 1, G_UNICODE_SCRIPT_CYRILLIC },
{ 0x1e100, 45, G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG },
{ 0x1e130, 14, G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG },
{ 0x1e140, 10, G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG },
@ -3241,6 +3251,7 @@ static const struct {
{ 0x1e290, 31, G_UNICODE_SCRIPT_TOTO },
{ 0x1e2c0, 58, G_UNICODE_SCRIPT_WANCHO },
{ 0x1e2ff, 1, G_UNICODE_SCRIPT_WANCHO },
{ 0x1e4d0, 42, G_UNICODE_SCRIPT_NAG_MUNDARI },
{ 0x1e7e0, 7, G_UNICODE_SCRIPT_ETHIOPIC },
{ 0x1e7e8, 4, G_UNICODE_SCRIPT_ETHIOPIC },
{ 0x1e7ed, 2, G_UNICODE_SCRIPT_ETHIOPIC },
@ -3301,10 +3312,10 @@ static const struct {
{ 0x1f250, 2, G_UNICODE_SCRIPT_COMMON },
{ 0x1f260, 6, G_UNICODE_SCRIPT_COMMON },
{ 0x1f300, 984, G_UNICODE_SCRIPT_COMMON },
{ 0x1f6dd, 16, G_UNICODE_SCRIPT_COMMON },
{ 0x1f6dc, 17, G_UNICODE_SCRIPT_COMMON },
{ 0x1f6f0, 13, G_UNICODE_SCRIPT_COMMON },
{ 0x1f700, 116, G_UNICODE_SCRIPT_COMMON },
{ 0x1f780, 89, G_UNICODE_SCRIPT_COMMON },
{ 0x1f700, 119, G_UNICODE_SCRIPT_COMMON },
{ 0x1f77b, 95, G_UNICODE_SCRIPT_COMMON },
{ 0x1f7e0, 12, G_UNICODE_SCRIPT_COMMON },
{ 0x1f7f0, 1, G_UNICODE_SCRIPT_COMMON },
{ 0x1f800, 12, G_UNICODE_SCRIPT_COMMON },
@ -3315,25 +3326,24 @@ static const struct {
{ 0x1f8b0, 2, G_UNICODE_SCRIPT_COMMON },
{ 0x1f900, 340, G_UNICODE_SCRIPT_COMMON },
{ 0x1fa60, 14, G_UNICODE_SCRIPT_COMMON },
{ 0x1fa70, 5, G_UNICODE_SCRIPT_COMMON },
{ 0x1fa78, 5, G_UNICODE_SCRIPT_COMMON },
{ 0x1fa80, 7, G_UNICODE_SCRIPT_COMMON },
{ 0x1fa90, 29, G_UNICODE_SCRIPT_COMMON },
{ 0x1fab0, 11, G_UNICODE_SCRIPT_COMMON },
{ 0x1fac0, 6, G_UNICODE_SCRIPT_COMMON },
{ 0x1fad0, 10, G_UNICODE_SCRIPT_COMMON },
{ 0x1fae0, 8, G_UNICODE_SCRIPT_COMMON },
{ 0x1faf0, 7, G_UNICODE_SCRIPT_COMMON },
{ 0x1fa70, 13, G_UNICODE_SCRIPT_COMMON },
{ 0x1fa80, 9, G_UNICODE_SCRIPT_COMMON },
{ 0x1fa90, 46, G_UNICODE_SCRIPT_COMMON },
{ 0x1fabf, 7, G_UNICODE_SCRIPT_COMMON },
{ 0x1face, 14, G_UNICODE_SCRIPT_COMMON },
{ 0x1fae0, 9, G_UNICODE_SCRIPT_COMMON },
{ 0x1faf0, 9, G_UNICODE_SCRIPT_COMMON },
{ 0x1fb00, 147, G_UNICODE_SCRIPT_COMMON },
{ 0x1fb94, 55, G_UNICODE_SCRIPT_COMMON },
{ 0x1fbf0, 10, G_UNICODE_SCRIPT_COMMON },
{ 0x20000, 42720, G_UNICODE_SCRIPT_HAN },
{ 0x2a700, 4153, G_UNICODE_SCRIPT_HAN },
{ 0x2a700, 4154, G_UNICODE_SCRIPT_HAN },
{ 0x2b740, 222, G_UNICODE_SCRIPT_HAN },
{ 0x2b820, 5762, G_UNICODE_SCRIPT_HAN },
{ 0x2ceb0, 7473, G_UNICODE_SCRIPT_HAN },
{ 0x2f800, 542, G_UNICODE_SCRIPT_HAN },
{ 0x30000, 4939, G_UNICODE_SCRIPT_HAN },
{ 0x31350, 4192, G_UNICODE_SCRIPT_HAN },
{ 0xe0001, 1, G_UNICODE_SCRIPT_COMMON },
{ 0xe0020, 96, G_UNICODE_SCRIPT_COMMON },
{ 0xe0100, 240, G_UNICODE_SCRIPT_INHERITED },

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -438,6 +438,8 @@ typedef enum
* @G_UNICODE_SCRIPT_TOTO: Toto. Since: 2.72
* @G_UNICODE_SCRIPT_VITHKUQI: Vithkuqi. Since: 2.72
* @G_UNICODE_SCRIPT_MATH: Mathematical notation. Since: 2.72
* @G_UNICODE_SCRIPT_KAWI: Kawi. Since 2.74
* @G_UNICODE_SCRIPT_NAG_MUNDARI: Nag Mundari. Since 2.74
*
* The #GUnicodeScript enumeration identifies different writing
* systems. The values correspond to the names as defined in the
@ -646,6 +648,10 @@ typedef enum
/* not really a Unicode script, but part of ISO 15924 */
G_UNICODE_SCRIPT_MATH, /* Zmth */
/* Unicode 15.0 additions */
G_UNICODE_SCRIPT_KAWI GLIB_AVAILABLE_ENUMERATOR_IN_2_74, /* Kawi */
G_UNICODE_SCRIPT_NAG_MUNDARI GLIB_AVAILABLE_ENUMERATOR_IN_2_74, /* Nag Mundari */
} GUnicodeScript;
GLIB_AVAILABLE_IN_ALL

File diff suppressed because it is too large Load Diff

View File

@ -477,6 +477,14 @@ g_unichar_iswide_bsearch (gunichar ch)
return FALSE;
}
static const struct Interval default_wide_blocks[] = {
{ 0x3400, 0x4dbf },
{ 0x4e00, 0x9fff },
{ 0xf900, 0xfaff },
{ 0x20000, 0x2fffd },
{ 0x30000, 0x3fffd }
};
/**
* g_unichar_iswide:
* @c: a Unicode character
@ -491,8 +499,17 @@ g_unichar_iswide (gunichar c)
{
if (c < g_unicode_width_table_wide[0].start)
return FALSE;
else
return g_unichar_iswide_bsearch (c);
else if (g_unichar_iswide_bsearch (c))
return TRUE;
else if (g_unichar_type (c) == G_UNICODE_UNASSIGNED &&
bsearch (GUINT_TO_POINTER (c),
default_wide_blocks,
G_N_ELEMENTS (default_wide_blocks),
sizeof default_wide_blocks[0],
interval_compare))
return TRUE;
return FALSE;
}
@ -1506,6 +1523,10 @@ static const guint32 iso15924_tags[] =
/* not really a Unicode script, but part of ISO 15924 */
PACK ('Z', 'm', 't', 'h'), /* G_UNICODE_SCRIPT_MATH */
/* Unicode 15.0 additions */
PACK ('K', 'a', 'w', 'i'), /* G_UNICODE_SCRIPT_KAWI */
PACK ('N', 'a', 'g', 'm'), /* G_UNICODE_SCRIPT_NAG_MUNDARI */
#undef PACK
};

View File

@ -1,4 +1,4 @@
# Test cases generated from Unicode 13.0.0 data
# Test cases generated from Unicode 15.0.0 data
# by gen-casefold-txt.py. Do not edit.
#
# Some special hand crafted tests
@ -967,6 +967,7 @@ Z z
Ⱜ ⱜ
Ⱝ ⱝ
Ⱞ ⱞ
Ⱟ ⱟ
Ⱡ ⱡ
Ɫ ɫ
Ᵽ ᵽ
@ -1147,12 +1148,16 @@ Z z
Ꞻ ꞻ
Ꞽ ꞽ
Ꞿ ꞿ
Ꟁ ꟁ
Ꟃ ꟃ
Ꞔ ꞔ
Ʂ ʂ
Ᶎ ᶎ
Ꟈ ꟈ
Ꟊ ꟊ
Ꟑ ꟑ
Ꟗ ꟗ
Ꟙ ꟙ
Ꟶ ꟶ
@ -1348,6 +1353,41 @@ Z z
𐓑 𐓹
𐓒 𐓺
𐓓 𐓻
𐕰 𐖗
𐕱 𐖘
𐕲 𐖙
𐕳 𐖚
𐕴 𐖛
𐕵 𐖜
𐕶 𐖝
𐕷 𐖞
𐕸 𐖟
𐕹 𐖠
𐕺 𐖡
𐕼 𐖣
𐕽 𐖤
𐕾 𐖥
𐕿 𐖦
𐖀 𐖧
𐖁 𐖨
𐖂 𐖩
𐖃 𐖪
𐖄 𐖫
𐖅 𐖬
𐖆 𐖭
𐖇 𐖮
𐖈 𐖯
𐖉 𐖰
𐖊 𐖱
𐖌 𐖳
𐖍 𐖴
𐖎 𐖵
𐖏 𐖶
𐖐 𐖷
𐖑 𐖸
𐖒 𐖹
𐖔 𐖻
𐖕 𐖼
𐲀 𐳀
𐲁 𐳁
𐲂 𐳂

View File

@ -1,4 +1,4 @@
# Test cases generated from Unicode 13.0.0 data
# Test cases generated from Unicode 15.0.0 data
# by gen-casemap-txt.py. Do not edit.
#
# Some special hand crafted tests
@ -1976,6 +1976,7 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
Ⱜ ⱜ Ⱜ # 2C2C
Ⱝ ⱝ Ⱝ # 2C2D
Ⱞ ⱞ Ⱞ # 2C2E
Ⱟ ⱟ Ⱟ # 2C2F
ⰰ ⰰ Ⰰ Ⰰ # 2C30
ⰱ ⰱ Ⰱ Ⰱ # 2C31
ⰲ ⰲ Ⰲ Ⰲ # 2C32
@ -2023,6 +2024,7 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
ⱜ ⱜ Ⱜ Ⱜ # 2C5C
ⱝ ⱝ Ⱝ Ⱝ # 2C5D
ⱞ ⱞ Ⱞ Ⱞ # 2C5E
ⱟ ⱟ Ⱟ Ⱟ # 2C5F
Ⱡ ⱡ Ⱡ # 2C60
ⱡ ⱡ Ⱡ Ⱡ # 2C61
Ɫ ɫ Ɫ # 2C62
@ -2427,6 +2429,8 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
ꞽ ꞽ Ꞽ Ꞽ # A7BD
Ꞿ ꞿ Ꞿ # A7BE
ꞿ ꞿ Ꞿ Ꞿ # A7BF
Ꟁ ꟁ Ꟁ # A7C0
ꟁ ꟁ Ꟁ Ꟁ # A7C1
Ꟃ ꟃ Ꟃ # A7C2
ꟃ ꟃ Ꟃ Ꟃ # A7C3
Ꞔ ꞔ Ꞔ # A7C4
@ -2436,6 +2440,14 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
ꟈ ꟈ Ꟈ Ꟈ # A7C8
Ꟊ ꟊ Ꟊ # A7C9
ꟊ ꟊ Ꟊ Ꟊ # A7CA
Ꟑ ꟑ Ꟑ # A7D0
ꟑ ꟑ Ꟑ Ꟑ # A7D1
ꟓ ꟓ # A7D3
ꟕ ꟕ # A7D5
Ꟗ ꟗ Ꟗ # A7D6
ꟗ ꟗ Ꟗ Ꟗ # A7D7
Ꟙ ꟙ Ꟙ # A7D8
ꟙ ꟙ Ꟙ Ꟙ # A7D9
Ꟶ ꟶ Ꟶ # A7F5
ꟶ ꟶ Ꟶ Ꟶ # A7F6
ꟺ ꟺ # A7FA
@ -2787,6 +2799,76 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
𐓹 𐓹 𐓑 𐓑 # 104F9
𐓺 𐓺 𐓒 𐓒 # 104FA
𐓻 𐓻 𐓓 𐓓 # 104FB
𐕰 𐖗 𐕰 # 10570
𐕱 𐖘 𐕱 # 10571
𐕲 𐖙 𐕲 # 10572
𐕳 𐖚 𐕳 # 10573
𐕴 𐖛 𐕴 # 10574
𐕵 𐖜 𐕵 # 10575
𐕶 𐖝 𐕶 # 10576
𐕷 𐖞 𐕷 # 10577
𐕸 𐖟 𐕸 # 10578
𐕹 𐖠 𐕹 # 10579
𐕺 𐖡 𐕺 # 1057A
𐕼 𐖣 𐕼 # 1057C
𐕽 𐖤 𐕽 # 1057D
𐕾 𐖥 𐕾 # 1057E
𐕿 𐖦 𐕿 # 1057F
𐖀 𐖧 𐖀 # 10580
𐖁 𐖨 𐖁 # 10581
𐖂 𐖩 𐖂 # 10582
𐖃 𐖪 𐖃 # 10583
𐖄 𐖫 𐖄 # 10584
𐖅 𐖬 𐖅 # 10585
𐖆 𐖭 𐖆 # 10586
𐖇 𐖮 𐖇 # 10587
𐖈 𐖯 𐖈 # 10588
𐖉 𐖰 𐖉 # 10589
𐖊 𐖱 𐖊 # 1058A
𐖌 𐖳 𐖌 # 1058C
𐖍 𐖴 𐖍 # 1058D
𐖎 𐖵 𐖎 # 1058E
𐖏 𐖶 𐖏 # 1058F
𐖐 𐖷 𐖐 # 10590
𐖑 𐖸 𐖑 # 10591
𐖒 𐖹 𐖒 # 10592
𐖔 𐖻 𐖔 # 10594
𐖕 𐖼 𐖕 # 10595
𐖗 𐖗 𐕰 𐕰 # 10597
𐖘 𐖘 𐕱 𐕱 # 10598
𐖙 𐖙 𐕲 𐕲 # 10599
𐖚 𐖚 𐕳 𐕳 # 1059A
𐖛 𐖛 𐕴 𐕴 # 1059B
𐖜 𐖜 𐕵 𐕵 # 1059C
𐖝 𐖝 𐕶 𐕶 # 1059D
𐖞 𐖞 𐕷 𐕷 # 1059E
𐖟 𐖟 𐕸 𐕸 # 1059F
𐖠 𐖠 𐕹 𐕹 # 105A0
𐖡 𐖡 𐕺 𐕺 # 105A1
𐖣 𐖣 𐕼 𐕼 # 105A3
𐖤 𐖤 𐕽 𐕽 # 105A4
𐖥 𐖥 𐕾 𐕾 # 105A5
𐖦 𐖦 𐕿 𐕿 # 105A6
𐖧 𐖧 𐖀 𐖀 # 105A7
𐖨 𐖨 𐖁 𐖁 # 105A8
𐖩 𐖩 𐖂 𐖂 # 105A9
𐖪 𐖪 𐖃 𐖃 # 105AA
𐖫 𐖫 𐖄 𐖄 # 105AB
𐖬 𐖬 𐖅 𐖅 # 105AC
𐖭 𐖭 𐖆 𐖆 # 105AD
𐖮 𐖮 𐖇 𐖇 # 105AE
𐖯 𐖯 𐖈 𐖈 # 105AF
𐖰 𐖰 𐖉 𐖉 # 105B0
𐖱 𐖱 𐖊 𐖊 # 105B1
𐖳 𐖳 𐖌 𐖌 # 105B3
𐖴 𐖴 𐖍 𐖍 # 105B4
𐖵 𐖵 𐖎 𐖎 # 105B5
𐖶 𐖶 𐖏 𐖏 # 105B6
𐖷 𐖷 𐖐 𐖐 # 105B7
𐖸 𐖸 𐖑 𐖑 # 105B8
𐖹 𐖹 𐖒 𐖒 # 105B9
𐖻 𐖻 𐖔 𐖔 # 105BB
𐖼 𐖼 𐖕 𐖕 # 105BC
𐲀 𐳀 𐲀 # 10C80
𐲁 𐳁 𐲁 # 10C81
𐲂 𐳂 𐲂 # 10C82
@ -3953,6 +4035,42 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
𝟉 𝟉 # 1D7C9
𝟊 𝟊 # 1D7CA
𝟋 𝟋 # 1D7CB
𝼀 𝼀 # 1DF00
𝼁 𝼁 # 1DF01
𝼂 𝼂 # 1DF02
𝼃 𝼃 # 1DF03
𝼄 𝼄 # 1DF04
𝼅 𝼅 # 1DF05
𝼆 𝼆 # 1DF06
𝼇 𝼇 # 1DF07
𝼈 𝼈 # 1DF08
𝼉 𝼉 # 1DF09
𝼋 𝼋 # 1DF0B
𝼌 𝼌 # 1DF0C
𝼍 𝼍 # 1DF0D
𝼎 𝼎 # 1DF0E
𝼏 𝼏 # 1DF0F
𝼐 𝼐 # 1DF10
𝼑 𝼑 # 1DF11
𝼒 𝼒 # 1DF12
𝼓 𝼓 # 1DF13
𝼔 𝼔 # 1DF14
𝼕 𝼕 # 1DF15
𝼖 𝼖 # 1DF16
𝼗 𝼗 # 1DF17
𝼘 𝼘 # 1DF18
𝼙 𝼙 # 1DF19
𝼚 𝼚 # 1DF1A
𝼛 𝼛 # 1DF1B
𝼜 𝼜 # 1DF1C
𝼝 𝼝 # 1DF1D
𝼞 𝼞 # 1DF1E
𝼥 𝼥 # 1DF25
𝼦 𝼦 # 1DF26
𝼧 𝼧 # 1DF27
𝼨 𝼨 # 1DF28
𝼩 𝼩 # 1DF29
𝼪 𝼪 # 1DF2A
𞤀 𞤢 𞤀 # 1E900
𞤁 𞤣 𞤁 # 1E901
𞤂 𞤤 𞤂 # 1E902

View File

@ -354,7 +354,9 @@ test_unichar_script (void)
{ G_UNICODE_SCRIPT_OLD_UYGHUR, 0x10F70 },
{ G_UNICODE_SCRIPT_TANGSA, 0x16A70 },
{ G_UNICODE_SCRIPT_TOTO, 0x1E290 },
{ G_UNICODE_SCRIPT_VITHKUQI, 0x10570 }
{ G_UNICODE_SCRIPT_VITHKUQI, 0x10570 },
{ G_UNICODE_SCRIPT_KAWI, 0x11F00 },
{ G_UNICODE_SCRIPT_NAG_MUNDARI, 0x1E4D0 },
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
g_assert_cmpint (g_unichar_get_script (examples[i].c), ==, examples[i].script);
@ -1848,7 +1850,11 @@ test_iso15924 (void)
{ G_UNICODE_SCRIPT_OLD_UYGHUR, "Ougr" },
{ G_UNICODE_SCRIPT_TANGSA, "Tnsa" },
{ G_UNICODE_SCRIPT_TOTO, "Toto" },
{ G_UNICODE_SCRIPT_VITHKUQI, "Vith" }
{ G_UNICODE_SCRIPT_VITHKUQI, "Vith" },
/* Unicode 15.0 additions */
{ G_UNICODE_SCRIPT_KAWI, "Kawi" },
{ G_UNICODE_SCRIPT_NAG_MUNDARI, "Nagm" },
};
guint i;
@ -1873,6 +1879,7 @@ test_iso15924 (void)
data[i].four_letter_code[2],
data[i].four_letter_code[3]);
g_test_message ("Testing script %s (code %u)", data[i].four_letter_code, code);
g_assert_cmphex (g_unicode_script_to_iso15924 (data[i].script), ==, code);
g_assert_cmpint (g_unicode_script_from_iso15924 (code), ==, data[i].script);
}

38
tools/update-unicode-data.sh Executable file
View File

@ -0,0 +1,38 @@
#!/usr/bin/env bash
if [ ! -d "$1" ]; then
echo "Usage $(basename "$0") UCD-directory [version]"
exit 1
fi
ucd=$(realpath "$1")
version=$2
glib_dir=$(git -C "$(dirname "$0")" rev-parse --show-toplevel)
# shellcheck disable=SC2144 # we only want to match a file like this
if ! [ -f "$ucd"/UnicodeData*.txt ] || ! [ -f "$ucd"/CaseFolding.*txt ]; then
echo "'$ucd' does not look like an Unicode Database directory";
fi
if [ -z "$version" ]; then
readme=("$ucd"/ReadMe*.txt)
version=$(sed -n "s,.*Version \([0-9.]\+\) of the Unicode Standard.*,\1,p" \
"${readme[@]}")
if [ -z "$version" ]; then
echo "Invalid version found"
exit 1
fi
fi
cd "$glib_dir" || exit 1
echo "Updating generated code to Unicode version $version"
set -xe
(cd glib && ./gen-unicode-tables.pl -both "$version" "$ucd")
glib/tests/gen-casefold-txt.py "$version" \
"$ucd"/CaseFolding*.txt > glib/tests/casefold.txt
glib/tests/gen-casemap-txt.py "$version" \
"$ucd"/UnicodeData*.txt \
"$ucd"/SpecialCasing*.txt > glib/tests/casemap.txt