mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-23 20:46:14 +01:00
Merge branch 'unicode-15' into 'main'
Unicode 15 support Closes #2735 See merge request GNOME/glib!2877
This commit is contained in:
commit
cc02e8720d
@ -795,7 +795,7 @@ sub print_row
|
||||
my ($column) = 4;
|
||||
for ($i = $start; $i < $start + 256; ++$i)
|
||||
{
|
||||
print OUT ", "
|
||||
print OUT ","
|
||||
if $i > $start;
|
||||
my ($text) = $values[$i - $start];
|
||||
if (length ($text) + $column + 2 > 78)
|
||||
@ -803,6 +803,10 @@ sub print_row
|
||||
print OUT "\n ";
|
||||
$column = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
print OUT " "
|
||||
}
|
||||
print OUT $text;
|
||||
$column += length ($text) + 2;
|
||||
}
|
||||
|
@ -1113,7 +1113,7 @@ static const guchar g_script_easy_table[8192] = {
|
||||
G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_KANNADA,
|
||||
G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_KANNADA,
|
||||
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_KANNADA,
|
||||
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
|
||||
G_UNICODE_SCRIPT_KANNADA, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
|
||||
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
|
||||
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
|
||||
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
|
||||
@ -1271,7 +1271,7 @@ static const guchar g_script_easy_table[8192] = {
|
||||
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_LAO,
|
||||
G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO,
|
||||
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO,
|
||||
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_UNKNOWN, G_UNICODE_SCRIPT_UNKNOWN,
|
||||
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_UNKNOWN,
|
||||
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO,
|
||||
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO,
|
||||
G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO, G_UNICODE_SCRIPT_LAO,
|
||||
@ -3039,6 +3039,7 @@ static const struct {
|
||||
{ 0x10e80, 42, G_UNICODE_SCRIPT_YEZIDI },
|
||||
{ 0x10eab, 3, G_UNICODE_SCRIPT_YEZIDI },
|
||||
{ 0x10eb0, 2, G_UNICODE_SCRIPT_YEZIDI },
|
||||
{ 0x10efd, 3, G_UNICODE_SCRIPT_ARABIC },
|
||||
{ 0x10f00, 40, G_UNICODE_SCRIPT_OLD_SOGDIAN },
|
||||
{ 0x10f30, 42, G_UNICODE_SCRIPT_SOGDIAN },
|
||||
{ 0x10f70, 26, G_UNICODE_SCRIPT_OLD_UYGHUR },
|
||||
@ -3057,7 +3058,7 @@ static const struct {
|
||||
{ 0x11180, 96, G_UNICODE_SCRIPT_SHARADA },
|
||||
{ 0x111e1, 20, G_UNICODE_SCRIPT_SINHALA },
|
||||
{ 0x11200, 18, G_UNICODE_SCRIPT_KHOJKI },
|
||||
{ 0x11213, 44, G_UNICODE_SCRIPT_KHOJKI },
|
||||
{ 0x11213, 47, G_UNICODE_SCRIPT_KHOJKI },
|
||||
{ 0x11280, 7, G_UNICODE_SCRIPT_MULTANI },
|
||||
{ 0x11288, 1, G_UNICODE_SCRIPT_MULTANI },
|
||||
{ 0x1128a, 4, G_UNICODE_SCRIPT_MULTANI },
|
||||
@ -3113,6 +3114,7 @@ static const struct {
|
||||
{ 0x11a50, 83, G_UNICODE_SCRIPT_SOYOMBO },
|
||||
{ 0x11ab0, 16, G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL },
|
||||
{ 0x11ac0, 57, G_UNICODE_SCRIPT_PAU_CIN_HAU },
|
||||
{ 0x11b00, 10, G_UNICODE_SCRIPT_DEVANAGARI },
|
||||
{ 0x11c00, 9, G_UNICODE_SCRIPT_BHAIKSUKI },
|
||||
{ 0x11c0a, 45, G_UNICODE_SCRIPT_BHAIKSUKI },
|
||||
{ 0x11c38, 14, G_UNICODE_SCRIPT_BHAIKSUKI },
|
||||
@ -3134,6 +3136,9 @@ static const struct {
|
||||
{ 0x11d93, 6, G_UNICODE_SCRIPT_GUNJALA_GONDI },
|
||||
{ 0x11da0, 10, G_UNICODE_SCRIPT_GUNJALA_GONDI },
|
||||
{ 0x11ee0, 25, G_UNICODE_SCRIPT_MAKASAR },
|
||||
{ 0x11f00, 17, G_UNICODE_SCRIPT_KAWI },
|
||||
{ 0x11f12, 41, G_UNICODE_SCRIPT_KAWI },
|
||||
{ 0x11f3e, 28, G_UNICODE_SCRIPT_KAWI },
|
||||
{ 0x11fb0, 1, G_UNICODE_SCRIPT_LISU },
|
||||
{ 0x11fc0, 50, G_UNICODE_SCRIPT_TAMIL },
|
||||
{ 0x11fff, 1, G_UNICODE_SCRIPT_TAMIL },
|
||||
@ -3142,8 +3147,7 @@ static const struct {
|
||||
{ 0x12470, 5, G_UNICODE_SCRIPT_CUNEIFORM },
|
||||
{ 0x12480, 196, G_UNICODE_SCRIPT_CUNEIFORM },
|
||||
{ 0x12f90, 99, G_UNICODE_SCRIPT_CYPRO_MINOAN },
|
||||
{ 0x13000, 1071, G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS },
|
||||
{ 0x13430, 9, G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS },
|
||||
{ 0x13000, 1110, G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS },
|
||||
{ 0x14400, 583, G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS },
|
||||
{ 0x16800, 569, G_UNICODE_SCRIPT_BAMUM },
|
||||
{ 0x16a40, 31, G_UNICODE_SCRIPT_MRO },
|
||||
@ -3177,7 +3181,9 @@ static const struct {
|
||||
{ 0x1b000, 1, G_UNICODE_SCRIPT_KATAKANA },
|
||||
{ 0x1b001, 287, G_UNICODE_SCRIPT_HIRAGANA },
|
||||
{ 0x1b120, 3, G_UNICODE_SCRIPT_KATAKANA },
|
||||
{ 0x1b132, 1, G_UNICODE_SCRIPT_HIRAGANA },
|
||||
{ 0x1b150, 3, G_UNICODE_SCRIPT_HIRAGANA },
|
||||
{ 0x1b155, 1, G_UNICODE_SCRIPT_KATAKANA },
|
||||
{ 0x1b164, 4, G_UNICODE_SCRIPT_KATAKANA },
|
||||
{ 0x1b170, 396, G_UNICODE_SCRIPT_NUSHU },
|
||||
{ 0x1bc00, 107, G_UNICODE_SCRIPT_DUPLOYAN },
|
||||
@ -3201,6 +3207,7 @@ static const struct {
|
||||
{ 0x1d1aa, 4, G_UNICODE_SCRIPT_INHERITED },
|
||||
{ 0x1d1ae, 61, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1d200, 70, G_UNICODE_SCRIPT_GREEK },
|
||||
{ 0x1d2c0, 20, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1d2e0, 20, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1d300, 87, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1d360, 25, G_UNICODE_SCRIPT_COMMON },
|
||||
@ -3229,11 +3236,14 @@ static const struct {
|
||||
{ 0x1da9b, 5, G_UNICODE_SCRIPT_SIGNWRITING },
|
||||
{ 0x1daa1, 15, G_UNICODE_SCRIPT_SIGNWRITING },
|
||||
{ 0x1df00, 31, G_UNICODE_SCRIPT_LATIN },
|
||||
{ 0x1df25, 6, G_UNICODE_SCRIPT_LATIN },
|
||||
{ 0x1e000, 7, G_UNICODE_SCRIPT_GLAGOLITIC },
|
||||
{ 0x1e008, 17, G_UNICODE_SCRIPT_GLAGOLITIC },
|
||||
{ 0x1e01b, 7, G_UNICODE_SCRIPT_GLAGOLITIC },
|
||||
{ 0x1e023, 2, G_UNICODE_SCRIPT_GLAGOLITIC },
|
||||
{ 0x1e026, 5, G_UNICODE_SCRIPT_GLAGOLITIC },
|
||||
{ 0x1e030, 62, G_UNICODE_SCRIPT_CYRILLIC },
|
||||
{ 0x1e08f, 1, G_UNICODE_SCRIPT_CYRILLIC },
|
||||
{ 0x1e100, 45, G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG },
|
||||
{ 0x1e130, 14, G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG },
|
||||
{ 0x1e140, 10, G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG },
|
||||
@ -3241,6 +3251,7 @@ static const struct {
|
||||
{ 0x1e290, 31, G_UNICODE_SCRIPT_TOTO },
|
||||
{ 0x1e2c0, 58, G_UNICODE_SCRIPT_WANCHO },
|
||||
{ 0x1e2ff, 1, G_UNICODE_SCRIPT_WANCHO },
|
||||
{ 0x1e4d0, 42, G_UNICODE_SCRIPT_NAG_MUNDARI },
|
||||
{ 0x1e7e0, 7, G_UNICODE_SCRIPT_ETHIOPIC },
|
||||
{ 0x1e7e8, 4, G_UNICODE_SCRIPT_ETHIOPIC },
|
||||
{ 0x1e7ed, 2, G_UNICODE_SCRIPT_ETHIOPIC },
|
||||
@ -3301,10 +3312,10 @@ static const struct {
|
||||
{ 0x1f250, 2, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f260, 6, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f300, 984, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f6dd, 16, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f6dc, 17, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f6f0, 13, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f700, 116, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f780, 89, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f700, 119, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f77b, 95, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f7e0, 12, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f7f0, 1, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f800, 12, G_UNICODE_SCRIPT_COMMON },
|
||||
@ -3315,25 +3326,24 @@ static const struct {
|
||||
{ 0x1f8b0, 2, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1f900, 340, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fa60, 14, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fa70, 5, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fa78, 5, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fa80, 7, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fa90, 29, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fab0, 11, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fac0, 6, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fad0, 10, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fae0, 8, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1faf0, 7, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fa70, 13, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fa80, 9, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fa90, 46, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fabf, 7, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1face, 14, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fae0, 9, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1faf0, 9, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fb00, 147, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fb94, 55, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x1fbf0, 10, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0x20000, 42720, G_UNICODE_SCRIPT_HAN },
|
||||
{ 0x2a700, 4153, G_UNICODE_SCRIPT_HAN },
|
||||
{ 0x2a700, 4154, G_UNICODE_SCRIPT_HAN },
|
||||
{ 0x2b740, 222, G_UNICODE_SCRIPT_HAN },
|
||||
{ 0x2b820, 5762, G_UNICODE_SCRIPT_HAN },
|
||||
{ 0x2ceb0, 7473, G_UNICODE_SCRIPT_HAN },
|
||||
{ 0x2f800, 542, G_UNICODE_SCRIPT_HAN },
|
||||
{ 0x30000, 4939, G_UNICODE_SCRIPT_HAN },
|
||||
{ 0x31350, 4192, G_UNICODE_SCRIPT_HAN },
|
||||
{ 0xe0001, 1, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0xe0020, 96, G_UNICODE_SCRIPT_COMMON },
|
||||
{ 0xe0100, 240, G_UNICODE_SCRIPT_INHERITED },
|
||||
|
762
glib/gunibreak.h
762
glib/gunibreak.h
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -438,6 +438,8 @@ typedef enum
|
||||
* @G_UNICODE_SCRIPT_TOTO: Toto. Since: 2.72
|
||||
* @G_UNICODE_SCRIPT_VITHKUQI: Vithkuqi. Since: 2.72
|
||||
* @G_UNICODE_SCRIPT_MATH: Mathematical notation. Since: 2.72
|
||||
* @G_UNICODE_SCRIPT_KAWI: Kawi. Since 2.74
|
||||
* @G_UNICODE_SCRIPT_NAG_MUNDARI: Nag Mundari. Since 2.74
|
||||
*
|
||||
* The #GUnicodeScript enumeration identifies different writing
|
||||
* systems. The values correspond to the names as defined in the
|
||||
@ -646,6 +648,10 @@ typedef enum
|
||||
|
||||
/* not really a Unicode script, but part of ISO 15924 */
|
||||
G_UNICODE_SCRIPT_MATH, /* Zmth */
|
||||
|
||||
/* Unicode 15.0 additions */
|
||||
G_UNICODE_SCRIPT_KAWI GLIB_AVAILABLE_ENUMERATOR_IN_2_74, /* Kawi */
|
||||
G_UNICODE_SCRIPT_NAG_MUNDARI GLIB_AVAILABLE_ENUMERATOR_IN_2_74, /* Nag Mundari */
|
||||
} GUnicodeScript;
|
||||
|
||||
GLIB_AVAILABLE_IN_ALL
|
||||
|
2475
glib/gunidecomp.h
2475
glib/gunidecomp.h
File diff suppressed because it is too large
Load Diff
@ -477,6 +477,14 @@ g_unichar_iswide_bsearch (gunichar ch)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const struct Interval default_wide_blocks[] = {
|
||||
{ 0x3400, 0x4dbf },
|
||||
{ 0x4e00, 0x9fff },
|
||||
{ 0xf900, 0xfaff },
|
||||
{ 0x20000, 0x2fffd },
|
||||
{ 0x30000, 0x3fffd }
|
||||
};
|
||||
|
||||
/**
|
||||
* g_unichar_iswide:
|
||||
* @c: a Unicode character
|
||||
@ -491,8 +499,17 @@ g_unichar_iswide (gunichar c)
|
||||
{
|
||||
if (c < g_unicode_width_table_wide[0].start)
|
||||
return FALSE;
|
||||
else
|
||||
return g_unichar_iswide_bsearch (c);
|
||||
else if (g_unichar_iswide_bsearch (c))
|
||||
return TRUE;
|
||||
else if (g_unichar_type (c) == G_UNICODE_UNASSIGNED &&
|
||||
bsearch (GUINT_TO_POINTER (c),
|
||||
default_wide_blocks,
|
||||
G_N_ELEMENTS (default_wide_blocks),
|
||||
sizeof default_wide_blocks[0],
|
||||
interval_compare))
|
||||
return TRUE;
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
@ -1506,6 +1523,10 @@ static const guint32 iso15924_tags[] =
|
||||
/* not really a Unicode script, but part of ISO 15924 */
|
||||
PACK ('Z', 'm', 't', 'h'), /* G_UNICODE_SCRIPT_MATH */
|
||||
|
||||
/* Unicode 15.0 additions */
|
||||
PACK ('K', 'a', 'w', 'i'), /* G_UNICODE_SCRIPT_KAWI */
|
||||
PACK ('N', 'a', 'g', 'm'), /* G_UNICODE_SCRIPT_NAG_MUNDARI */
|
||||
|
||||
#undef PACK
|
||||
};
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Test cases generated from Unicode 13.0.0 data
|
||||
# Test cases generated from Unicode 15.0.0 data
|
||||
# by gen-casefold-txt.py. Do not edit.
|
||||
#
|
||||
# Some special hand crafted tests
|
||||
@ -967,6 +967,7 @@ Z z
|
||||
Ⱜ ⱜ
|
||||
Ⱝ ⱝ
|
||||
Ⱞ ⱞ
|
||||
Ⱟ ⱟ
|
||||
Ⱡ ⱡ
|
||||
Ɫ ɫ
|
||||
Ᵽ ᵽ
|
||||
@ -1147,12 +1148,16 @@ Z z
|
||||
Ꞻ ꞻ
|
||||
Ꞽ ꞽ
|
||||
Ꞿ ꞿ
|
||||
Ꟁ ꟁ
|
||||
Ꟃ ꟃ
|
||||
Ꞔ ꞔ
|
||||
Ʂ ʂ
|
||||
Ᶎ ᶎ
|
||||
Ꟈ ꟈ
|
||||
Ꟊ ꟊ
|
||||
Ꟑ ꟑ
|
||||
Ꟗ ꟗ
|
||||
Ꟙ ꟙ
|
||||
Ꟶ ꟶ
|
||||
ꭰ Ꭰ
|
||||
ꭱ Ꭱ
|
||||
@ -1348,6 +1353,41 @@ Z z
|
||||
𐓑 𐓹
|
||||
𐓒 𐓺
|
||||
𐓓 𐓻
|
||||
𐕰 𐖗
|
||||
𐕱 𐖘
|
||||
𐕲 𐖙
|
||||
𐕳 𐖚
|
||||
𐕴 𐖛
|
||||
𐕵 𐖜
|
||||
𐕶 𐖝
|
||||
𐕷 𐖞
|
||||
𐕸 𐖟
|
||||
𐕹 𐖠
|
||||
𐕺 𐖡
|
||||
𐕼 𐖣
|
||||
𐕽 𐖤
|
||||
𐕾 𐖥
|
||||
𐕿 𐖦
|
||||
𐖀 𐖧
|
||||
𐖁 𐖨
|
||||
𐖂 𐖩
|
||||
𐖃 𐖪
|
||||
𐖄 𐖫
|
||||
𐖅 𐖬
|
||||
𐖆 𐖭
|
||||
𐖇 𐖮
|
||||
𐖈 𐖯
|
||||
𐖉 𐖰
|
||||
𐖊 𐖱
|
||||
𐖌 𐖳
|
||||
𐖍 𐖴
|
||||
𐖎 𐖵
|
||||
𐖏 𐖶
|
||||
𐖐 𐖷
|
||||
𐖑 𐖸
|
||||
𐖒 𐖹
|
||||
𐖔 𐖻
|
||||
𐖕 𐖼
|
||||
𐲀 𐳀
|
||||
𐲁 𐳁
|
||||
𐲂 𐳂
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Test cases generated from Unicode 13.0.0 data
|
||||
# Test cases generated from Unicode 15.0.0 data
|
||||
# by gen-casemap-txt.py. Do not edit.
|
||||
#
|
||||
# Some special hand crafted tests
|
||||
@ -1976,6 +1976,7 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
|
||||
Ⱜ ⱜ Ⱜ # 2C2C
|
||||
Ⱝ ⱝ Ⱝ # 2C2D
|
||||
Ⱞ ⱞ Ⱞ # 2C2E
|
||||
Ⱟ ⱟ Ⱟ # 2C2F
|
||||
ⰰ ⰰ Ⰰ Ⰰ # 2C30
|
||||
ⰱ ⰱ Ⰱ Ⰱ # 2C31
|
||||
ⰲ ⰲ Ⰲ Ⰲ # 2C32
|
||||
@ -2023,6 +2024,7 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
|
||||
ⱜ ⱜ Ⱜ Ⱜ # 2C5C
|
||||
ⱝ ⱝ Ⱝ Ⱝ # 2C5D
|
||||
ⱞ ⱞ Ⱞ Ⱞ # 2C5E
|
||||
ⱟ ⱟ Ⱟ Ⱟ # 2C5F
|
||||
Ⱡ ⱡ Ⱡ # 2C60
|
||||
ⱡ ⱡ Ⱡ Ⱡ # 2C61
|
||||
Ɫ ɫ Ɫ # 2C62
|
||||
@ -2427,6 +2429,8 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
|
||||
ꞽ ꞽ Ꞽ Ꞽ # A7BD
|
||||
Ꞿ ꞿ Ꞿ # A7BE
|
||||
ꞿ ꞿ Ꞿ Ꞿ # A7BF
|
||||
Ꟁ ꟁ Ꟁ # A7C0
|
||||
ꟁ ꟁ Ꟁ Ꟁ # A7C1
|
||||
Ꟃ ꟃ Ꟃ # A7C2
|
||||
ꟃ ꟃ Ꟃ Ꟃ # A7C3
|
||||
Ꞔ ꞔ Ꞔ # A7C4
|
||||
@ -2436,6 +2440,14 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
|
||||
ꟈ ꟈ Ꟈ Ꟈ # A7C8
|
||||
Ꟊ ꟊ Ꟊ # A7C9
|
||||
ꟊ ꟊ Ꟊ Ꟊ # A7CA
|
||||
Ꟑ ꟑ Ꟑ # A7D0
|
||||
ꟑ ꟑ Ꟑ Ꟑ # A7D1
|
||||
ꟓ ꟓ # A7D3
|
||||
ꟕ ꟕ # A7D5
|
||||
Ꟗ ꟗ Ꟗ # A7D6
|
||||
ꟗ ꟗ Ꟗ Ꟗ # A7D7
|
||||
Ꟙ ꟙ Ꟙ # A7D8
|
||||
ꟙ ꟙ Ꟙ Ꟙ # A7D9
|
||||
Ꟶ ꟶ Ꟶ # A7F5
|
||||
ꟶ ꟶ Ꟶ Ꟶ # A7F6
|
||||
ꟺ ꟺ # A7FA
|
||||
@ -2787,6 +2799,76 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
|
||||
𐓹 𐓹 𐓑 𐓑 # 104F9
|
||||
𐓺 𐓺 𐓒 𐓒 # 104FA
|
||||
𐓻 𐓻 𐓓 𐓓 # 104FB
|
||||
𐕰 𐖗 𐕰 # 10570
|
||||
𐕱 𐖘 𐕱 # 10571
|
||||
𐕲 𐖙 𐕲 # 10572
|
||||
𐕳 𐖚 𐕳 # 10573
|
||||
𐕴 𐖛 𐕴 # 10574
|
||||
𐕵 𐖜 𐕵 # 10575
|
||||
𐕶 𐖝 𐕶 # 10576
|
||||
𐕷 𐖞 𐕷 # 10577
|
||||
𐕸 𐖟 𐕸 # 10578
|
||||
𐕹 𐖠 𐕹 # 10579
|
||||
𐕺 𐖡 𐕺 # 1057A
|
||||
𐕼 𐖣 𐕼 # 1057C
|
||||
𐕽 𐖤 𐕽 # 1057D
|
||||
𐕾 𐖥 𐕾 # 1057E
|
||||
𐕿 𐖦 𐕿 # 1057F
|
||||
𐖀 𐖧 𐖀 # 10580
|
||||
𐖁 𐖨 𐖁 # 10581
|
||||
𐖂 𐖩 𐖂 # 10582
|
||||
𐖃 𐖪 𐖃 # 10583
|
||||
𐖄 𐖫 𐖄 # 10584
|
||||
𐖅 𐖬 𐖅 # 10585
|
||||
𐖆 𐖭 𐖆 # 10586
|
||||
𐖇 𐖮 𐖇 # 10587
|
||||
𐖈 𐖯 𐖈 # 10588
|
||||
𐖉 𐖰 𐖉 # 10589
|
||||
𐖊 𐖱 𐖊 # 1058A
|
||||
𐖌 𐖳 𐖌 # 1058C
|
||||
𐖍 𐖴 𐖍 # 1058D
|
||||
𐖎 𐖵 𐖎 # 1058E
|
||||
𐖏 𐖶 𐖏 # 1058F
|
||||
𐖐 𐖷 𐖐 # 10590
|
||||
𐖑 𐖸 𐖑 # 10591
|
||||
𐖒 𐖹 𐖒 # 10592
|
||||
𐖔 𐖻 𐖔 # 10594
|
||||
𐖕 𐖼 𐖕 # 10595
|
||||
𐖗 𐖗 𐕰 𐕰 # 10597
|
||||
𐖘 𐖘 𐕱 𐕱 # 10598
|
||||
𐖙 𐖙 𐕲 𐕲 # 10599
|
||||
𐖚 𐖚 𐕳 𐕳 # 1059A
|
||||
𐖛 𐖛 𐕴 𐕴 # 1059B
|
||||
𐖜 𐖜 𐕵 𐕵 # 1059C
|
||||
𐖝 𐖝 𐕶 𐕶 # 1059D
|
||||
𐖞 𐖞 𐕷 𐕷 # 1059E
|
||||
𐖟 𐖟 𐕸 𐕸 # 1059F
|
||||
𐖠 𐖠 𐕹 𐕹 # 105A0
|
||||
𐖡 𐖡 𐕺 𐕺 # 105A1
|
||||
𐖣 𐖣 𐕼 𐕼 # 105A3
|
||||
𐖤 𐖤 𐕽 𐕽 # 105A4
|
||||
𐖥 𐖥 𐕾 𐕾 # 105A5
|
||||
𐖦 𐖦 𐕿 𐕿 # 105A6
|
||||
𐖧 𐖧 𐖀 𐖀 # 105A7
|
||||
𐖨 𐖨 𐖁 𐖁 # 105A8
|
||||
𐖩 𐖩 𐖂 𐖂 # 105A9
|
||||
𐖪 𐖪 𐖃 𐖃 # 105AA
|
||||
𐖫 𐖫 𐖄 𐖄 # 105AB
|
||||
𐖬 𐖬 𐖅 𐖅 # 105AC
|
||||
𐖭 𐖭 𐖆 𐖆 # 105AD
|
||||
𐖮 𐖮 𐖇 𐖇 # 105AE
|
||||
𐖯 𐖯 𐖈 𐖈 # 105AF
|
||||
𐖰 𐖰 𐖉 𐖉 # 105B0
|
||||
𐖱 𐖱 𐖊 𐖊 # 105B1
|
||||
𐖳 𐖳 𐖌 𐖌 # 105B3
|
||||
𐖴 𐖴 𐖍 𐖍 # 105B4
|
||||
𐖵 𐖵 𐖎 𐖎 # 105B5
|
||||
𐖶 𐖶 𐖏 𐖏 # 105B6
|
||||
𐖷 𐖷 𐖐 𐖐 # 105B7
|
||||
𐖸 𐖸 𐖑 𐖑 # 105B8
|
||||
𐖹 𐖹 𐖒 𐖒 # 105B9
|
||||
𐖻 𐖻 𐖔 𐖔 # 105BB
|
||||
𐖼 𐖼 𐖕 𐖕 # 105BC
|
||||
𐲀 𐳀 𐲀 # 10C80
|
||||
𐲁 𐳁 𐲁 # 10C81
|
||||
𐲂 𐳂 𐲂 # 10C82
|
||||
@ -3953,6 +4035,42 @@ lt_LT.UTF-8 Į́ į̇́ Į́ Į́ # LATIN CAPITAL LETTER I WITH OGONEK (with ac
|
||||
𝟉 𝟉 # 1D7C9
|
||||
𝟊 𝟊 # 1D7CA
|
||||
𝟋 𝟋 # 1D7CB
|
||||
𝼀 𝼀 # 1DF00
|
||||
𝼁 𝼁 # 1DF01
|
||||
𝼂 𝼂 # 1DF02
|
||||
𝼃 𝼃 # 1DF03
|
||||
𝼄 𝼄 # 1DF04
|
||||
𝼅 𝼅 # 1DF05
|
||||
𝼆 𝼆 # 1DF06
|
||||
𝼇 𝼇 # 1DF07
|
||||
𝼈 𝼈 # 1DF08
|
||||
𝼉 𝼉 # 1DF09
|
||||
𝼋 𝼋 # 1DF0B
|
||||
𝼌 𝼌 # 1DF0C
|
||||
𝼍 𝼍 # 1DF0D
|
||||
𝼎 𝼎 # 1DF0E
|
||||
𝼏 𝼏 # 1DF0F
|
||||
𝼐 𝼐 # 1DF10
|
||||
𝼑 𝼑 # 1DF11
|
||||
𝼒 𝼒 # 1DF12
|
||||
𝼓 𝼓 # 1DF13
|
||||
𝼔 𝼔 # 1DF14
|
||||
𝼕 𝼕 # 1DF15
|
||||
𝼖 𝼖 # 1DF16
|
||||
𝼗 𝼗 # 1DF17
|
||||
𝼘 𝼘 # 1DF18
|
||||
𝼙 𝼙 # 1DF19
|
||||
𝼚 𝼚 # 1DF1A
|
||||
𝼛 𝼛 # 1DF1B
|
||||
𝼜 𝼜 # 1DF1C
|
||||
𝼝 𝼝 # 1DF1D
|
||||
𝼞 𝼞 # 1DF1E
|
||||
𝼥 𝼥 # 1DF25
|
||||
𝼦 𝼦 # 1DF26
|
||||
𝼧 𝼧 # 1DF27
|
||||
𝼨 𝼨 # 1DF28
|
||||
𝼩 𝼩 # 1DF29
|
||||
𝼪 𝼪 # 1DF2A
|
||||
𞤀 𞤢 𞤀 # 1E900
|
||||
𞤁 𞤣 𞤁 # 1E901
|
||||
𞤂 𞤤 𞤂 # 1E902
|
||||
|
@ -354,7 +354,9 @@ test_unichar_script (void)
|
||||
{ G_UNICODE_SCRIPT_OLD_UYGHUR, 0x10F70 },
|
||||
{ G_UNICODE_SCRIPT_TANGSA, 0x16A70 },
|
||||
{ G_UNICODE_SCRIPT_TOTO, 0x1E290 },
|
||||
{ G_UNICODE_SCRIPT_VITHKUQI, 0x10570 }
|
||||
{ G_UNICODE_SCRIPT_VITHKUQI, 0x10570 },
|
||||
{ G_UNICODE_SCRIPT_KAWI, 0x11F00 },
|
||||
{ G_UNICODE_SCRIPT_NAG_MUNDARI, 0x1E4D0 },
|
||||
};
|
||||
for (i = 0; i < G_N_ELEMENTS (examples); i++)
|
||||
g_assert_cmpint (g_unichar_get_script (examples[i].c), ==, examples[i].script);
|
||||
@ -1848,7 +1850,11 @@ test_iso15924 (void)
|
||||
{ G_UNICODE_SCRIPT_OLD_UYGHUR, "Ougr" },
|
||||
{ G_UNICODE_SCRIPT_TANGSA, "Tnsa" },
|
||||
{ G_UNICODE_SCRIPT_TOTO, "Toto" },
|
||||
{ G_UNICODE_SCRIPT_VITHKUQI, "Vith" }
|
||||
{ G_UNICODE_SCRIPT_VITHKUQI, "Vith" },
|
||||
|
||||
/* Unicode 15.0 additions */
|
||||
{ G_UNICODE_SCRIPT_KAWI, "Kawi" },
|
||||
{ G_UNICODE_SCRIPT_NAG_MUNDARI, "Nagm" },
|
||||
};
|
||||
guint i;
|
||||
|
||||
@ -1873,6 +1879,7 @@ test_iso15924 (void)
|
||||
data[i].four_letter_code[2],
|
||||
data[i].four_letter_code[3]);
|
||||
|
||||
g_test_message ("Testing script %s (code %u)", data[i].four_letter_code, code);
|
||||
g_assert_cmphex (g_unicode_script_to_iso15924 (data[i].script), ==, code);
|
||||
g_assert_cmpint (g_unicode_script_from_iso15924 (code), ==, data[i].script);
|
||||
}
|
||||
|
38
tools/update-unicode-data.sh
Executable file
38
tools/update-unicode-data.sh
Executable file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if [ ! -d "$1" ]; then
|
||||
echo "Usage $(basename "$0") UCD-directory [version]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ucd=$(realpath "$1")
|
||||
version=$2
|
||||
glib_dir=$(git -C "$(dirname "$0")" rev-parse --show-toplevel)
|
||||
|
||||
# shellcheck disable=SC2144 # we only want to match a file like this
|
||||
if ! [ -f "$ucd"/UnicodeData*.txt ] || ! [ -f "$ucd"/CaseFolding.*txt ]; then
|
||||
echo "'$ucd' does not look like an Unicode Database directory";
|
||||
fi
|
||||
|
||||
if [ -z "$version" ]; then
|
||||
readme=("$ucd"/ReadMe*.txt)
|
||||
version=$(sed -n "s,.*Version \([0-9.]\+\) of the Unicode Standard.*,\1,p" \
|
||||
"${readme[@]}")
|
||||
|
||||
if [ -z "$version" ]; then
|
||||
echo "Invalid version found"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
cd "$glib_dir" || exit 1
|
||||
|
||||
echo "Updating generated code to Unicode version $version"
|
||||
set -xe
|
||||
|
||||
(cd glib && ./gen-unicode-tables.pl -both "$version" "$ucd")
|
||||
glib/tests/gen-casefold-txt.py "$version" \
|
||||
"$ucd"/CaseFolding*.txt > glib/tests/casefold.txt
|
||||
glib/tests/gen-casemap-txt.py "$version" \
|
||||
"$ucd"/UnicodeData*.txt \
|
||||
"$ucd"/SpecialCasing*.txt > glib/tests/casemap.txt
|
Loading…
Reference in New Issue
Block a user