mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-24 21:16:15 +01:00
Add some more Unicode tests
This commit is contained in:
parent
45411ccbe3
commit
839078ea63
@ -96,6 +96,265 @@ test_unichar_validate (void)
|
||||
g_assert (!g_unichar_validate (0x110000));
|
||||
}
|
||||
|
||||
static void
|
||||
test_unichar_character_type (void)
|
||||
{
|
||||
gint i;
|
||||
struct {
|
||||
GUnicodeType type;
|
||||
gunichar c;
|
||||
} examples[] = {
|
||||
{ G_UNICODE_CONTROL, 0x000D },
|
||||
{ G_UNICODE_FORMAT, 0x200E },
|
||||
/* G_UNICODE_UNASSIGNED */
|
||||
{ G_UNICODE_PRIVATE_USE, 0xE000 },
|
||||
{ G_UNICODE_SURROGATE, 0xD800 },
|
||||
{ G_UNICODE_LOWERCASE_LETTER, 0x0061 },
|
||||
{ G_UNICODE_MODIFIER_LETTER, 0x02B0 },
|
||||
{ G_UNICODE_OTHER_LETTER, 0x3400 },
|
||||
{ G_UNICODE_TITLECASE_LETTER, 0x01C5 },
|
||||
{ G_UNICODE_UPPERCASE_LETTER, 0xFF21 },
|
||||
{ G_UNICODE_COMBINING_MARK, 0x0903 },
|
||||
{ G_UNICODE_ENCLOSING_MARK, 0x20DD },
|
||||
{ G_UNICODE_NON_SPACING_MARK, 0xA806 },
|
||||
{ G_UNICODE_DECIMAL_NUMBER, 0xFF10 },
|
||||
{ G_UNICODE_LETTER_NUMBER, 0x16EE },
|
||||
{ G_UNICODE_OTHER_NUMBER, 0x17F0 },
|
||||
{ G_UNICODE_CONNECT_PUNCTUATION, 0x005F },
|
||||
{ G_UNICODE_DASH_PUNCTUATION, 0x058A },
|
||||
{ G_UNICODE_CLOSE_PUNCTUATION, 0x0F3B },
|
||||
{ G_UNICODE_FINAL_PUNCTUATION, 0x2019 },
|
||||
{ G_UNICODE_INITIAL_PUNCTUATION, 0x2018 },
|
||||
{ G_UNICODE_OTHER_PUNCTUATION, 0x2016 },
|
||||
{ G_UNICODE_OPEN_PUNCTUATION, 0x0F3A },
|
||||
{ G_UNICODE_CURRENCY_SYMBOL, 0x20A0 },
|
||||
{ G_UNICODE_MODIFIER_SYMBOL, 0x309B },
|
||||
{ G_UNICODE_MATH_SYMBOL, 0xFB29 },
|
||||
{ G_UNICODE_OTHER_SYMBOL, 0x00A6 },
|
||||
{ G_UNICODE_LINE_SEPARATOR, 0x2028 },
|
||||
{ G_UNICODE_PARAGRAPH_SEPARATOR, 0x2029 },
|
||||
{ G_UNICODE_SPACE_SEPARATOR, 0x202F },
|
||||
};
|
||||
|
||||
for (i = 0; i < G_N_ELEMENTS (examples); i++)
|
||||
{
|
||||
g_assert (g_unichar_type (examples[i].c) == examples[i].type);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_unichar_break_type (void)
|
||||
{
|
||||
gint i;
|
||||
struct {
|
||||
GUnicodeBreakType type;
|
||||
gunichar c;
|
||||
} examples[] = {
|
||||
{ G_UNICODE_BREAK_MANDATORY, 0x2028 },
|
||||
{ G_UNICODE_BREAK_CARRIAGE_RETURN, 0x000D },
|
||||
{ G_UNICODE_BREAK_LINE_FEED, 0x000A },
|
||||
{ G_UNICODE_BREAK_COMBINING_MARK, 0x0300 },
|
||||
{ G_UNICODE_BREAK_SURROGATE, 0xD800 },
|
||||
{ G_UNICODE_BREAK_ZERO_WIDTH_SPACE, 0x200B },
|
||||
{ G_UNICODE_BREAK_INSEPARABLE, 0x2024 },
|
||||
{ G_UNICODE_BREAK_NON_BREAKING_GLUE, 0x00A0 },
|
||||
{ G_UNICODE_BREAK_CONTINGENT, 0xFFFC },
|
||||
{ G_UNICODE_BREAK_SPACE, 0x0020 },
|
||||
{ G_UNICODE_BREAK_AFTER, 0x05BE },
|
||||
{ G_UNICODE_BREAK_BEFORE, 0x02C8 },
|
||||
{ G_UNICODE_BREAK_BEFORE_AND_AFTER, 0x2014 },
|
||||
{ G_UNICODE_BREAK_HYPHEN, 0x002D },
|
||||
{ G_UNICODE_BREAK_NON_STARTER, 0x17D6 },
|
||||
{ G_UNICODE_BREAK_OPEN_PUNCTUATION, 0x0028 },
|
||||
{ G_UNICODE_BREAK_CLOSE_PUNCTUATION, 0x0029 },
|
||||
{ G_UNICODE_BREAK_QUOTATION, 0x0022 },
|
||||
{ G_UNICODE_BREAK_EXCLAMATION, 0x0021 },
|
||||
{ G_UNICODE_BREAK_IDEOGRAPHIC, 0x2E80 },
|
||||
{ G_UNICODE_BREAK_NUMERIC, 0x0030 },
|
||||
{ G_UNICODE_BREAK_INFIX_SEPARATOR, 0x002C },
|
||||
{ G_UNICODE_BREAK_SYMBOL, 0x002F },
|
||||
{ G_UNICODE_BREAK_ALPHABETIC, 0x0023 },
|
||||
{ G_UNICODE_BREAK_PREFIX, 0x0024 },
|
||||
{ G_UNICODE_BREAK_POSTFIX, 0x0025 },
|
||||
{ G_UNICODE_BREAK_COMPLEX_CONTEXT, 0x0E01 },
|
||||
{ G_UNICODE_BREAK_AMBIGUOUS, 0x00F7 },
|
||||
{ G_UNICODE_BREAK_UNKNOWN, 0xE000 },
|
||||
{ G_UNICODE_BREAK_NEXT_LINE, 0x0085 },
|
||||
{ G_UNICODE_BREAK_WORD_JOINER, 0x2060 },
|
||||
{ G_UNICODE_BREAK_HANGUL_L_JAMO, 0x1100 },
|
||||
{ G_UNICODE_BREAK_HANGUL_V_JAMO, 0x1160 },
|
||||
{ G_UNICODE_BREAK_HANGUL_T_JAMO, 0x11A8 },
|
||||
{ G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, 0xAC00 },
|
||||
{ G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE, 0xAC01 }
|
||||
};
|
||||
|
||||
for (i = 0; i < G_N_ELEMENTS (examples); i++)
|
||||
{
|
||||
g_assert (g_unichar_break_type (examples[i].c) == examples[i].type);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_unichar_script (void)
|
||||
{
|
||||
gint i;
|
||||
struct {
|
||||
GUnicodeScript script;
|
||||
gunichar c;
|
||||
} examples[] = {
|
||||
{ G_UNICODE_SCRIPT_COMMON, 0x002A },
|
||||
/* { G_UNICODE_SCRIPT_INHERITED, 0x1CED }, 5.2 addition */
|
||||
{ G_UNICODE_SCRIPT_INHERITED, 0x0670 },
|
||||
{ G_UNICODE_SCRIPT_ARABIC, 0x060D },
|
||||
{ G_UNICODE_SCRIPT_ARMENIAN, 0x0559 },
|
||||
{ G_UNICODE_SCRIPT_BENGALI, 0x09CD },
|
||||
{ G_UNICODE_SCRIPT_BOPOMOFO, 0x31B6 },
|
||||
{ G_UNICODE_SCRIPT_CHEROKEE, 0x13A2 },
|
||||
{ G_UNICODE_SCRIPT_COPTIC, 0x2CFD },
|
||||
{ G_UNICODE_SCRIPT_CYRILLIC, 0x0482 },
|
||||
{ G_UNICODE_SCRIPT_DESERET, 0x10401 },
|
||||
{ G_UNICODE_SCRIPT_DEVANAGARI, 0x094D },
|
||||
{ G_UNICODE_SCRIPT_ETHIOPIC, 0x1258 },
|
||||
{ G_UNICODE_SCRIPT_GEORGIAN, 0x10FC },
|
||||
{ G_UNICODE_SCRIPT_GOTHIC, 0x10341 },
|
||||
{ G_UNICODE_SCRIPT_GREEK, 0x0375 },
|
||||
{ G_UNICODE_SCRIPT_GUJARATI, 0x0A83 },
|
||||
{ G_UNICODE_SCRIPT_GURMUKHI, 0x0A3C },
|
||||
{ G_UNICODE_SCRIPT_HAN, 0x3005 },
|
||||
{ G_UNICODE_SCRIPT_HANGUL, 0x1100 },
|
||||
{ G_UNICODE_SCRIPT_HEBREW, 0x05BF },
|
||||
{ G_UNICODE_SCRIPT_HIRAGANA, 0x309F },
|
||||
{ G_UNICODE_SCRIPT_KANNADA, 0x0CBC },
|
||||
{ G_UNICODE_SCRIPT_KATAKANA, 0x30FF },
|
||||
{ G_UNICODE_SCRIPT_KHMER, 0x17DD },
|
||||
{ G_UNICODE_SCRIPT_LAO, 0x0EDD },
|
||||
{ G_UNICODE_SCRIPT_LATIN, 0x0061 },
|
||||
{ G_UNICODE_SCRIPT_MALAYALAM, 0x0D3D },
|
||||
{ G_UNICODE_SCRIPT_MONGOLIAN, 0x1843 },
|
||||
{ G_UNICODE_SCRIPT_MYANMAR, 0x1031 },
|
||||
{ G_UNICODE_SCRIPT_OGHAM, 0x169C },
|
||||
{ G_UNICODE_SCRIPT_OLD_ITALIC, 0x10322 },
|
||||
{ G_UNICODE_SCRIPT_ORIYA, 0x0B3C },
|
||||
{ G_UNICODE_SCRIPT_RUNIC, 0x16EF },
|
||||
{ G_UNICODE_SCRIPT_SINHALA, 0x0DBD },
|
||||
{ G_UNICODE_SCRIPT_SYRIAC, 0x0711 },
|
||||
{ G_UNICODE_SCRIPT_TAMIL, 0x0B82 },
|
||||
{ G_UNICODE_SCRIPT_TELUGU, 0x0C03 },
|
||||
{ G_UNICODE_SCRIPT_THAANA, 0x07B1 },
|
||||
{ G_UNICODE_SCRIPT_THAI, 0x0E31 },
|
||||
{ G_UNICODE_SCRIPT_TIBETAN, 0x0FD4 },
|
||||
/* { G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 0x1400 }, 5.2 addition */
|
||||
{ G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 0x1401 },
|
||||
{ G_UNICODE_SCRIPT_YI, 0xA015 },
|
||||
{ G_UNICODE_SCRIPT_TAGALOG, 0x1700 },
|
||||
{ G_UNICODE_SCRIPT_HANUNOO, 0x1720 },
|
||||
{ G_UNICODE_SCRIPT_BUHID, 0x1740 },
|
||||
{ G_UNICODE_SCRIPT_TAGBANWA, 0x1760 },
|
||||
{ G_UNICODE_SCRIPT_BRAILLE, 0x2800 },
|
||||
{ G_UNICODE_SCRIPT_CYPRIOT, 0x10808 },
|
||||
{ G_UNICODE_SCRIPT_LIMBU, 0x1932 },
|
||||
{ G_UNICODE_SCRIPT_OSMANYA, 0x10480 },
|
||||
{ G_UNICODE_SCRIPT_SHAVIAN, 0x10450 },
|
||||
{ G_UNICODE_SCRIPT_LINEAR_B, 0x10000 },
|
||||
{ G_UNICODE_SCRIPT_TAI_LE, 0x1950 },
|
||||
{ G_UNICODE_SCRIPT_UGARITIC, 0x1039F },
|
||||
{ G_UNICODE_SCRIPT_NEW_TAI_LUE, 0x1980 },
|
||||
{ G_UNICODE_SCRIPT_BUGINESE, 0x1A1F },
|
||||
{ G_UNICODE_SCRIPT_GLAGOLITIC, 0x2C00 },
|
||||
{ G_UNICODE_SCRIPT_TIFINAGH, 0x2D6F },
|
||||
{ G_UNICODE_SCRIPT_SYLOTI_NAGRI, 0xA800 },
|
||||
{ G_UNICODE_SCRIPT_OLD_PERSIAN, 0x103D0 },
|
||||
{ G_UNICODE_SCRIPT_KHAROSHTHI, 0x10A3F },
|
||||
/* G_UNICODE_SCRIPT_UNKNOWN */
|
||||
{ G_UNICODE_SCRIPT_BALINESE, 0x1B04 },
|
||||
{ G_UNICODE_SCRIPT_CUNEIFORM, 0x12000 },
|
||||
{ G_UNICODE_SCRIPT_PHOENICIAN, 0x10900 },
|
||||
{ G_UNICODE_SCRIPT_PHAGS_PA, 0xA840 },
|
||||
{ G_UNICODE_SCRIPT_NKO, 0x07C0 },
|
||||
{ G_UNICODE_SCRIPT_KAYAH_LI, 0xA900 },
|
||||
{ G_UNICODE_SCRIPT_LEPCHA, 0x1C00 },
|
||||
{ G_UNICODE_SCRIPT_REJANG, 0xA930 },
|
||||
{ G_UNICODE_SCRIPT_SUNDANESE, 0x1B80 },
|
||||
{ G_UNICODE_SCRIPT_SAURASHTRA, 0xA880 },
|
||||
{ G_UNICODE_SCRIPT_CHAM, 0xAA00 },
|
||||
{ G_UNICODE_SCRIPT_OL_CHIKI, 0x1C50 },
|
||||
{ G_UNICODE_SCRIPT_VAI, 0xA500 },
|
||||
{ G_UNICODE_SCRIPT_CARIAN, 0x102A0 },
|
||||
{ G_UNICODE_SCRIPT_LYCIAN, 0x10280 },
|
||||
{ G_UNICODE_SCRIPT_LYDIAN, 0x1093F },
|
||||
/* 5.2 additions
|
||||
{ G_UNICODE_SCRIPT_AVESTAN, 0x10B00 },
|
||||
{ G_UNICODE_SCRIPT_BAMUM, 0xA6A0 },
|
||||
{ G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, 0x13000 },
|
||||
{ G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, 0x10840 },
|
||||
{ G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, 0x10B60 },
|
||||
{ G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, 0x10B40 },
|
||||
{ G_UNICODE_SCRIPT_JAVANESE, 0xA980 },
|
||||
{ G_UNICODE_SCRIPT_KAITHI, 0x11082 },
|
||||
{ G_UNICODE_SCRIPT_LISU, 0xA4D0 },
|
||||
{ G_UNICODE_SCRIPT_MEETEI_MAYEK, 0xABE5 },
|
||||
{ G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, 0x10A60 },
|
||||
{ G_UNICODE_SCRIPT_OLD_TURKISH, 0x10C00 },
|
||||
{ G_UNICODE_SCRIPT_SAMARITAN, 0x0800 },
|
||||
{ G_UNICODE_SCRIPT_TAI_THAM, 0x1A20 },
|
||||
{ G_UNICODE_SCRIPT_TAI_VIET, 0xAA80 }
|
||||
*/
|
||||
};
|
||||
for (i = 0; i < G_N_ELEMENTS (examples); i++)
|
||||
{
|
||||
g_assert (g_unichar_get_script (examples[i].c) == examples[i].script);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_combining_class (void)
|
||||
{
|
||||
gint i;
|
||||
struct {
|
||||
gint class;
|
||||
gunichar c;
|
||||
} examples[] = {
|
||||
{ 0, 0x0020 },
|
||||
{ 1, 0x0334 },
|
||||
{ 7, 0x093C },
|
||||
{ 8, 0x3099 },
|
||||
{ 9, 0x094D },
|
||||
{ 10, 0x05B0 },
|
||||
{ 11, 0x05B1 },
|
||||
{ 12, 0x05B2 },
|
||||
{ 13, 0x05B3 },
|
||||
{ 14, 0x05B4 },
|
||||
{ 15, 0x05B5 },
|
||||
{ 16, 0x05B6 },
|
||||
{ 17, 0x05B7 },
|
||||
{ 18, 0x05B8 },
|
||||
{ 19, 0x05B9 },
|
||||
{ 20, 0x05BB },
|
||||
{ 21, 0x05BC },
|
||||
{ 22, 0x05BD },
|
||||
{ 23, 0x05BF },
|
||||
{ 24, 0x05C1 },
|
||||
{ 25, 0x05C2 },
|
||||
{ 26, 0xFB1E },
|
||||
{ 27, 0x064B },
|
||||
{ 28, 0x064C },
|
||||
{ 29, 0x064D },
|
||||
/* ... */
|
||||
{ 228, 0x05AE },
|
||||
{ 230, 0x0300 },
|
||||
{ 232, 0x302C },
|
||||
{ 233, 0x0362 },
|
||||
{ 234, 0x0360 },
|
||||
/* { 234, 0x1DCD }, 5.1 addition */
|
||||
{ 240, 0x0345 }
|
||||
};
|
||||
for (i = 0; i < G_N_ELEMENTS (examples); i++)
|
||||
{
|
||||
g_print ("%d: %#x\n", i, examples[i].c);
|
||||
g_assert (g_unichar_combining_class (examples[i].c) == examples[i].class);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc,
|
||||
char *argv[])
|
||||
@ -106,6 +365,10 @@ main (int argc,
|
||||
g_test_add_func ("/utf8/strncpy", test_utf8_strncpy);
|
||||
g_test_add_func ("/utf8/strrchr", test_utf8_strrchr);
|
||||
g_test_add_func ("/unicode/validate", test_unichar_validate);
|
||||
g_test_add_func ("/unicode/character-type", test_unichar_character_type);
|
||||
g_test_add_func ("/unicode/break-type", test_unichar_break_type);
|
||||
g_test_add_func ("/unicode/script", test_unichar_script);
|
||||
g_test_add_func ("/unicode/combining-class", test_combining_class);
|
||||
|
||||
return g_test_run();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user