mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-28 06:56:16 +01:00
Add a test for Unicode normalization
This test verifies the examples from the Unicode Annex that defines normalization.
This commit is contained in:
parent
770059b588
commit
9599a9451c
@ -1426,6 +1426,54 @@ test_fully_decompose_len (void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check various examples from Unicode Annex #15 for NFD and NFC
|
||||||
|
* normalization.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
test_normalization (void)
|
||||||
|
{
|
||||||
|
const struct {
|
||||||
|
const char *source;
|
||||||
|
const char *nfd;
|
||||||
|
const char *nfc;
|
||||||
|
} tests[] = {
|
||||||
|
// Singletons
|
||||||
|
{ "\xe2\x84\xab", "A\xcc\x8a", "Å" }, // U+212B ANGSTROM SIGN
|
||||||
|
{ "\xe2\x84\xa6", "Ω", "Ω" }, // U+2126 OHM SIGN
|
||||||
|
// Canonical Composites
|
||||||
|
{ "Å", "A\xcc\x8a", "Å" }, // U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
|
{ "ô", "o\xcc\x82", "ô" }, // U+00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||||
|
// Multiple Combining Marks
|
||||||
|
{ "\xe1\xb9\xa9", "s\xcc\xa3\xcc\x87", "ṩ" }, // U+1E69 LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
|
||||||
|
{ "\xe1\xb8\x8b\xcc\xa3", "d\xcc\xa3\xcc\x87", "ḍ̇" },
|
||||||
|
{ "q\xcc\x87\xcc\xa3", "q\xcc\xa3\xcc\x87", "q̣̇" },
|
||||||
|
// Compatibility Composites
|
||||||
|
{ "fi", "fi", "fi" }, // U+FB01 LATIN SMALL LIGATURE FI
|
||||||
|
{ "2\xe2\x81\xb5", "2\xe2\x81\xb5", "2⁵" },
|
||||||
|
{ "\xe1\xba\x9b\xcc\xa3", "\xc5\xbf\xcc\xa3\xcc\x87", "ẛ̣" },
|
||||||
|
|
||||||
|
// Tests for behavior with reordered marks
|
||||||
|
{ "s\xcc\x87\xcc\xa3", "s\xcc\xa3\xcc\x87", "ṩ" },
|
||||||
|
{ "α\xcc\x94\xcd\x82", "α\xcc\x94\xcd\x82", "ἇ" },
|
||||||
|
{ "α\xcd\x82\xcc\x94", "α\xcd\x82\xcc\x94", "ᾶ\xcc\x94" },
|
||||||
|
};
|
||||||
|
gsize i;
|
||||||
|
|
||||||
|
for (i = 0; i < G_N_ELEMENTS (tests); i++)
|
||||||
|
{
|
||||||
|
char *nfd, *nfc;
|
||||||
|
|
||||||
|
nfd = g_utf8_normalize (tests[i].source, -1, G_NORMALIZE_NFD);
|
||||||
|
g_assert_cmpstr (nfd, ==, tests[i].nfd);
|
||||||
|
|
||||||
|
nfc = g_utf8_normalize (tests[i].nfd, -1, G_NORMALIZE_NFC);
|
||||||
|
g_assert_cmpstr (nfc, ==, tests[i].nfc);
|
||||||
|
|
||||||
|
g_free (nfd);
|
||||||
|
g_free (nfc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
test_iso15924 (void)
|
test_iso15924 (void)
|
||||||
{
|
{
|
||||||
@ -1672,6 +1720,7 @@ main (int argc,
|
|||||||
g_test_add_func ("/unicode/digit-value", test_digit_value);
|
g_test_add_func ("/unicode/digit-value", test_digit_value);
|
||||||
g_test_add_func ("/unicode/fully-decompose-canonical", test_fully_decompose_canonical);
|
g_test_add_func ("/unicode/fully-decompose-canonical", test_fully_decompose_canonical);
|
||||||
g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
|
g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
|
||||||
|
g_test_add_func ("/unicode/normalization", test_normalization);
|
||||||
g_test_add_func ("/unicode/graph", test_graph);
|
g_test_add_func ("/unicode/graph", test_graph);
|
||||||
g_test_add_func ("/unicode/iso15924", test_iso15924);
|
g_test_add_func ("/unicode/iso15924", test_iso15924);
|
||||||
g_test_add_func ("/unicode/lower", test_lower);
|
g_test_add_func ("/unicode/lower", test_lower);
|
||||||
|
Loading…
Reference in New Issue
Block a user