glib/glib/tests/unicode.c
Matthias Clasen 9599a9451c Add a test for Unicode normalization
This test verifies the examples from the Unicode
Annex that defines normalization.
2021-07-30 16:54:59 +01:00

1746 lines
73 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* Unit tests for utilities
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (C) 2011 Google, Inc.
*
* This work is provided "as is"; redistribution and modification
* in whole or in part, in any medium, physical or electronic is
* permitted without restriction.
*
* This work is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* In no event shall the authors or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even
* if advised of the possibility of such damage.
*
* Author: Matthias Clasen, Behdad Esfahbod
*/
/* We are testing some deprecated APIs here */
#ifndef GLIB_DISABLE_DEPRECATION_WARNINGS
#define GLIB_DISABLE_DEPRECATION_WARNINGS
#endif
#include <locale.h>
#include "glib.h"
#include "glib/gunidecomp.h"
/* Test that g_unichar_validate() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_unichar_validate (void)
{
g_assert_true (g_unichar_validate ('j'));
g_assert_true (g_unichar_validate (8356));
g_assert_true (g_unichar_validate (8356));
g_assert_true (g_unichar_validate (0xFDD1));
g_assert_true (g_unichar_validate (917760));
g_assert_false (g_unichar_validate (0x110000));
}
/* Test that g_unichar_type() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_unichar_character_type (void)
{
guint i;
struct {
GUnicodeType type;
gunichar c;
} examples[] = {
{ G_UNICODE_CONTROL, 0x000D },
{ G_UNICODE_FORMAT, 0x200E },
/* G_UNICODE_UNASSIGNED */
{ G_UNICODE_PRIVATE_USE, 0xE000 },
{ G_UNICODE_SURROGATE, 0xD800 },
{ G_UNICODE_LOWERCASE_LETTER, 0x0061 },
{ G_UNICODE_MODIFIER_LETTER, 0x02B0 },
{ G_UNICODE_OTHER_LETTER, 0x3400 },
{ G_UNICODE_TITLECASE_LETTER, 0x01C5 },
{ G_UNICODE_UPPERCASE_LETTER, 0xFF21 },
{ G_UNICODE_SPACING_MARK, 0x0903 },
{ G_UNICODE_ENCLOSING_MARK, 0x20DD },
{ G_UNICODE_NON_SPACING_MARK, 0xA806 },
{ G_UNICODE_DECIMAL_NUMBER, 0xFF10 },
{ G_UNICODE_LETTER_NUMBER, 0x16EE },
{ G_UNICODE_OTHER_NUMBER, 0x17F0 },
{ G_UNICODE_CONNECT_PUNCTUATION, 0x005F },
{ G_UNICODE_DASH_PUNCTUATION, 0x058A },
{ G_UNICODE_CLOSE_PUNCTUATION, 0x0F3B },
{ G_UNICODE_FINAL_PUNCTUATION, 0x2019 },
{ G_UNICODE_INITIAL_PUNCTUATION, 0x2018 },
{ G_UNICODE_OTHER_PUNCTUATION, 0x2016 },
{ G_UNICODE_OPEN_PUNCTUATION, 0x0F3A },
{ G_UNICODE_CURRENCY_SYMBOL, 0x20A0 },
{ G_UNICODE_MODIFIER_SYMBOL, 0x309B },
{ G_UNICODE_MATH_SYMBOL, 0xFB29 },
{ G_UNICODE_OTHER_SYMBOL, 0x00A6 },
{ G_UNICODE_LINE_SEPARATOR, 0x2028 },
{ G_UNICODE_PARAGRAPH_SEPARATOR, 0x2029 },
{ G_UNICODE_SPACE_SEPARATOR, 0x202F },
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
{
g_assert_cmpint (g_unichar_type (examples[i].c), ==, examples[i].type);
}
/*** Testing TYPE() border cases ***/
g_assert_cmpint (g_unichar_type (0x3FF5), ==, 0x07);
/* U+FFEFF Plane 15 Private Use */
g_assert_cmpint (g_unichar_type (0xFFEFF), ==, 0x03);
/* U+E0001 Language Tag */
g_assert_cmpint (g_unichar_type (0xE0001), ==, 0x01);
g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR), ==, 0x02);
g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR + 1), ==, 0x02);
g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR_PART1), ==, 0x02);
g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR_PART1 + 1), ==, 0x02);
}
/* Test that g_unichar_break_type() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_unichar_break_type (void)
{
guint i;
struct {
GUnicodeBreakType type;
gunichar c;
} examples[] = {
{ G_UNICODE_BREAK_MANDATORY, 0x2028 },
{ G_UNICODE_BREAK_CARRIAGE_RETURN, 0x000D },
{ G_UNICODE_BREAK_LINE_FEED, 0x000A },
{ G_UNICODE_BREAK_COMBINING_MARK, 0x0300 },
{ G_UNICODE_BREAK_SURROGATE, 0xD800 },
{ G_UNICODE_BREAK_ZERO_WIDTH_SPACE, 0x200B },
{ G_UNICODE_BREAK_INSEPARABLE, 0x2024 },
{ G_UNICODE_BREAK_NON_BREAKING_GLUE, 0x00A0 },
{ G_UNICODE_BREAK_CONTINGENT, 0xFFFC },
{ G_UNICODE_BREAK_SPACE, 0x0020 },
{ G_UNICODE_BREAK_AFTER, 0x05BE },
{ G_UNICODE_BREAK_BEFORE, 0x02C8 },
{ G_UNICODE_BREAK_BEFORE_AND_AFTER, 0x2014 },
{ G_UNICODE_BREAK_HYPHEN, 0x002D },
{ G_UNICODE_BREAK_NON_STARTER, 0x17D6 },
{ G_UNICODE_BREAK_OPEN_PUNCTUATION, 0x0028 },
{ G_UNICODE_BREAK_CLOSE_PARANTHESIS, 0x0029 },
{ G_UNICODE_BREAK_CLOSE_PUNCTUATION, 0x007D },
{ G_UNICODE_BREAK_QUOTATION, 0x0022 },
{ G_UNICODE_BREAK_EXCLAMATION, 0x0021 },
{ G_UNICODE_BREAK_IDEOGRAPHIC, 0x2E80 },
{ G_UNICODE_BREAK_NUMERIC, 0x0030 },
{ G_UNICODE_BREAK_INFIX_SEPARATOR, 0x002C },
{ G_UNICODE_BREAK_SYMBOL, 0x002F },
{ G_UNICODE_BREAK_ALPHABETIC, 0x0023 },
{ G_UNICODE_BREAK_PREFIX, 0x0024 },
{ G_UNICODE_BREAK_POSTFIX, 0x0025 },
{ G_UNICODE_BREAK_COMPLEX_CONTEXT, 0x0E01 },
{ G_UNICODE_BREAK_AMBIGUOUS, 0x00F7 },
{ G_UNICODE_BREAK_UNKNOWN, 0xE000 },
{ G_UNICODE_BREAK_NEXT_LINE, 0x0085 },
{ G_UNICODE_BREAK_WORD_JOINER, 0x2060 },
{ G_UNICODE_BREAK_HANGUL_L_JAMO, 0x1100 },
{ G_UNICODE_BREAK_HANGUL_V_JAMO, 0x1160 },
{ G_UNICODE_BREAK_HANGUL_T_JAMO, 0x11A8 },
{ G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, 0xAC00 },
{ G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE, 0xAC01 },
{ G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER, 0x3041 },
{ G_UNICODE_BREAK_HEBREW_LETTER, 0x05D0 },
{ G_UNICODE_BREAK_REGIONAL_INDICATOR, 0x1F1F6 },
{ G_UNICODE_BREAK_EMOJI_BASE, 0x1F466 },
{ G_UNICODE_BREAK_EMOJI_MODIFIER, 0x1F3FB },
{ G_UNICODE_BREAK_ZERO_WIDTH_JOINER, 0x200D },
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
{
g_assert_cmpint (g_unichar_break_type (examples[i].c), ==, examples[i].type);
}
}
/* Test that g_unichar_get_script() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_unichar_script (void)
{
guint i;
struct {
GUnicodeScript script;
gunichar c;
} examples[] = {
{ G_UNICODE_SCRIPT_COMMON, 0x002A },
{ G_UNICODE_SCRIPT_INHERITED, 0x1CED },
{ G_UNICODE_SCRIPT_INHERITED, 0x0670 },
{ G_UNICODE_SCRIPT_ARABIC, 0x060D },
{ G_UNICODE_SCRIPT_ARMENIAN, 0x0559 },
{ G_UNICODE_SCRIPT_BENGALI, 0x09CD },
{ G_UNICODE_SCRIPT_BOPOMOFO, 0x31B6 },
{ G_UNICODE_SCRIPT_CHEROKEE, 0x13A2 },
{ G_UNICODE_SCRIPT_COPTIC, 0x2CFD },
{ G_UNICODE_SCRIPT_CYRILLIC, 0x0482 },
{ G_UNICODE_SCRIPT_DESERET, 0x10401 },
{ G_UNICODE_SCRIPT_DEVANAGARI, 0x094D },
{ G_UNICODE_SCRIPT_ETHIOPIC, 0x1258 },
{ G_UNICODE_SCRIPT_GEORGIAN, 0x10FC },
{ G_UNICODE_SCRIPT_GOTHIC, 0x10341 },
{ G_UNICODE_SCRIPT_GREEK, 0x0375 },
{ G_UNICODE_SCRIPT_GUJARATI, 0x0A83 },
{ G_UNICODE_SCRIPT_GURMUKHI, 0x0A3C },
{ G_UNICODE_SCRIPT_HAN, 0x3005 },
{ G_UNICODE_SCRIPT_HANGUL, 0x1100 },
{ G_UNICODE_SCRIPT_HEBREW, 0x05BF },
{ G_UNICODE_SCRIPT_HIRAGANA, 0x309F },
{ G_UNICODE_SCRIPT_KANNADA, 0x0CBC },
{ G_UNICODE_SCRIPT_KATAKANA, 0x30FF },
{ G_UNICODE_SCRIPT_KHMER, 0x17DD },
{ G_UNICODE_SCRIPT_LAO, 0x0EDD },
{ G_UNICODE_SCRIPT_LATIN, 0x0061 },
{ G_UNICODE_SCRIPT_MALAYALAM, 0x0D3D },
{ G_UNICODE_SCRIPT_MONGOLIAN, 0x1843 },
{ G_UNICODE_SCRIPT_MYANMAR, 0x1031 },
{ G_UNICODE_SCRIPT_OGHAM, 0x169C },
{ G_UNICODE_SCRIPT_OLD_ITALIC, 0x10322 },
{ G_UNICODE_SCRIPT_ORIYA, 0x0B3C },
{ G_UNICODE_SCRIPT_RUNIC, 0x16EF },
{ G_UNICODE_SCRIPT_SINHALA, 0x0DBD },
{ G_UNICODE_SCRIPT_SYRIAC, 0x0711 },
{ G_UNICODE_SCRIPT_TAMIL, 0x0B82 },
{ G_UNICODE_SCRIPT_TELUGU, 0x0C03 },
{ G_UNICODE_SCRIPT_THAANA, 0x07B1 },
{ G_UNICODE_SCRIPT_THAI, 0x0E31 },
{ G_UNICODE_SCRIPT_TIBETAN, 0x0FD4 },
{ G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 0x1400 },
{ G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 0x1401 },
{ G_UNICODE_SCRIPT_YI, 0xA015 },
{ G_UNICODE_SCRIPT_TAGALOG, 0x1700 },
{ G_UNICODE_SCRIPT_HANUNOO, 0x1720 },
{ G_UNICODE_SCRIPT_BUHID, 0x1740 },
{ G_UNICODE_SCRIPT_TAGBANWA, 0x1760 },
{ G_UNICODE_SCRIPT_BRAILLE, 0x2800 },
{ G_UNICODE_SCRIPT_CYPRIOT, 0x10808 },
{ G_UNICODE_SCRIPT_LIMBU, 0x1932 },
{ G_UNICODE_SCRIPT_OSMANYA, 0x10480 },
{ G_UNICODE_SCRIPT_SHAVIAN, 0x10450 },
{ G_UNICODE_SCRIPT_LINEAR_B, 0x10000 },
{ G_UNICODE_SCRIPT_TAI_LE, 0x1950 },
{ G_UNICODE_SCRIPT_UGARITIC, 0x1039F },
{ G_UNICODE_SCRIPT_NEW_TAI_LUE, 0x1980 },
{ G_UNICODE_SCRIPT_BUGINESE, 0x1A1F },
{ G_UNICODE_SCRIPT_GLAGOLITIC, 0x2C00 },
{ G_UNICODE_SCRIPT_TIFINAGH, 0x2D6F },
{ G_UNICODE_SCRIPT_SYLOTI_NAGRI, 0xA800 },
{ G_UNICODE_SCRIPT_OLD_PERSIAN, 0x103D0 },
{ G_UNICODE_SCRIPT_KHAROSHTHI, 0x10A3F },
{ G_UNICODE_SCRIPT_UNKNOWN, 0x1111111 },
{ G_UNICODE_SCRIPT_BALINESE, 0x1B04 },
{ G_UNICODE_SCRIPT_CUNEIFORM, 0x12000 },
{ G_UNICODE_SCRIPT_PHOENICIAN, 0x10900 },
{ G_UNICODE_SCRIPT_PHAGS_PA, 0xA840 },
{ G_UNICODE_SCRIPT_NKO, 0x07C0 },
{ G_UNICODE_SCRIPT_KAYAH_LI, 0xA900 },
{ G_UNICODE_SCRIPT_LEPCHA, 0x1C00 },
{ G_UNICODE_SCRIPT_REJANG, 0xA930 },
{ G_UNICODE_SCRIPT_SUNDANESE, 0x1B80 },
{ G_UNICODE_SCRIPT_SAURASHTRA, 0xA880 },
{ G_UNICODE_SCRIPT_CHAM, 0xAA00 },
{ G_UNICODE_SCRIPT_OL_CHIKI, 0x1C50 },
{ G_UNICODE_SCRIPT_VAI, 0xA500 },
{ G_UNICODE_SCRIPT_CARIAN, 0x102A0 },
{ G_UNICODE_SCRIPT_LYCIAN, 0x10280 },
{ G_UNICODE_SCRIPT_LYDIAN, 0x1093F },
{ G_UNICODE_SCRIPT_AVESTAN, 0x10B00 },
{ G_UNICODE_SCRIPT_BAMUM, 0xA6A0 },
{ G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, 0x13000 },
{ G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, 0x10840 },
{ G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, 0x10B60 },
{ G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, 0x10B40 },
{ G_UNICODE_SCRIPT_JAVANESE, 0xA980 },
{ G_UNICODE_SCRIPT_KAITHI, 0x11082 },
{ G_UNICODE_SCRIPT_LISU, 0xA4D0 },
{ G_UNICODE_SCRIPT_MEETEI_MAYEK, 0xABE5 },
{ G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, 0x10A60 },
{ G_UNICODE_SCRIPT_OLD_TURKIC, 0x10C00 },
{ G_UNICODE_SCRIPT_SAMARITAN, 0x0800 },
{ G_UNICODE_SCRIPT_TAI_THAM, 0x1A20 },
{ G_UNICODE_SCRIPT_TAI_VIET, 0xAA80 },
{ G_UNICODE_SCRIPT_BATAK, 0x1BC0 },
{ G_UNICODE_SCRIPT_BRAHMI, 0x11000 },
{ G_UNICODE_SCRIPT_MANDAIC, 0x0840 },
{ G_UNICODE_SCRIPT_CHAKMA, 0x11100 },
{ G_UNICODE_SCRIPT_MEROITIC_CURSIVE, 0x109A0 },
{ G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, 0x10980 },
{ G_UNICODE_SCRIPT_MIAO, 0x16F00 },
{ G_UNICODE_SCRIPT_SHARADA, 0x11180 },
{ G_UNICODE_SCRIPT_SORA_SOMPENG, 0x110D0 },
{ G_UNICODE_SCRIPT_TAKRI, 0x11680 },
{ G_UNICODE_SCRIPT_BASSA_VAH, 0x16AD0 },
{ G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, 0x10530 },
{ G_UNICODE_SCRIPT_DUPLOYAN, 0x1BC00 },
{ G_UNICODE_SCRIPT_ELBASAN, 0x10500 },
{ G_UNICODE_SCRIPT_GRANTHA, 0x11301 },
{ G_UNICODE_SCRIPT_KHOJKI, 0x11200 },
{ G_UNICODE_SCRIPT_KHUDAWADI, 0x112B0 },
{ G_UNICODE_SCRIPT_LINEAR_A, 0x10600 },
{ G_UNICODE_SCRIPT_MAHAJANI, 0x11150 },
{ G_UNICODE_SCRIPT_MANICHAEAN, 0x10AC0 },
{ G_UNICODE_SCRIPT_MENDE_KIKAKUI, 0x1E800 },
{ G_UNICODE_SCRIPT_MODI, 0x11600 },
{ G_UNICODE_SCRIPT_MRO, 0x16A40 },
{ G_UNICODE_SCRIPT_NABATAEAN, 0x10880 },
{ G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, 0x10A80 },
{ G_UNICODE_SCRIPT_OLD_PERMIC, 0x10350 },
{ G_UNICODE_SCRIPT_PAHAWH_HMONG, 0x16B00 },
{ G_UNICODE_SCRIPT_PALMYRENE, 0x10860 },
{ G_UNICODE_SCRIPT_PAU_CIN_HAU, 0x11AC0 },
{ G_UNICODE_SCRIPT_PSALTER_PAHLAVI, 0x10B80 },
{ G_UNICODE_SCRIPT_SIDDHAM, 0x11580 },
{ G_UNICODE_SCRIPT_TIRHUTA, 0x11480 },
{ G_UNICODE_SCRIPT_WARANG_CITI, 0x118A0 },
{ G_UNICODE_SCRIPT_CHEROKEE, 0x0AB71 },
{ G_UNICODE_SCRIPT_HATRAN, 0x108E0 },
{ G_UNICODE_SCRIPT_OLD_HUNGARIAN, 0x10C80 },
{ G_UNICODE_SCRIPT_MULTANI, 0x11280 },
{ G_UNICODE_SCRIPT_AHOM, 0x11700 },
{ G_UNICODE_SCRIPT_CUNEIFORM, 0x12480 },
{ G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS, 0x14400 },
{ G_UNICODE_SCRIPT_SIGNWRITING, 0x1D800 },
{ G_UNICODE_SCRIPT_ADLAM, 0x1E900 },
{ G_UNICODE_SCRIPT_BHAIKSUKI, 0x11C00 },
{ G_UNICODE_SCRIPT_MARCHEN, 0x11C70 },
{ G_UNICODE_SCRIPT_NEWA, 0x11400 },
{ G_UNICODE_SCRIPT_OSAGE, 0x104B0 },
{ G_UNICODE_SCRIPT_TANGUT, 0x16FE0 },
{ G_UNICODE_SCRIPT_MASARAM_GONDI, 0x11D00 },
{ G_UNICODE_SCRIPT_NUSHU, 0x1B170 },
{ G_UNICODE_SCRIPT_SOYOMBO, 0x11A50 },
{ G_UNICODE_SCRIPT_ZANABAZAR_SQUARE, 0x11A00 },
{ G_UNICODE_SCRIPT_DOGRA, 0x11800 },
{ G_UNICODE_SCRIPT_GUNJALA_GONDI, 0x11D60 },
{ G_UNICODE_SCRIPT_HANIFI_ROHINGYA, 0x10D00 },
{ G_UNICODE_SCRIPT_MAKASAR, 0x11EE0 },
{ G_UNICODE_SCRIPT_MEDEFAIDRIN, 0x16E40 },
{ G_UNICODE_SCRIPT_OLD_SOGDIAN, 0x10F00 },
{ G_UNICODE_SCRIPT_SOGDIAN, 0x10F30 },
{ G_UNICODE_SCRIPT_ELYMAIC, 0x10FE0 },
{ G_UNICODE_SCRIPT_NANDINAGARI, 0x119A0 },
{ G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG, 0x1E100 },
{ G_UNICODE_SCRIPT_WANCHO, 0x1E2C0 },
{ G_UNICODE_SCRIPT_CHORASMIAN, 0x10FB0 },
{ G_UNICODE_SCRIPT_DIVES_AKURU, 0x11900 },
{ G_UNICODE_SCRIPT_KHITAN_SMALL_SCRIPT, 0x18B00 },
{ G_UNICODE_SCRIPT_YEZIDI, 0x10E80 },
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
g_assert_cmpint (g_unichar_get_script (examples[i].c), ==, examples[i].script);
}
/* Test that g_unichar_combining_class() returns the correct value for
* various ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_combining_class (void)
{
guint i;
struct {
gint class;
gunichar c;
} examples[] = {
{ 0, 0x0020 },
{ 1, 0x0334 },
{ 7, 0x093C },
{ 8, 0x3099 },
{ 9, 0x094D },
{ 10, 0x05B0 },
{ 11, 0x05B1 },
{ 12, 0x05B2 },
{ 13, 0x05B3 },
{ 14, 0x05B4 },
{ 15, 0x05B5 },
{ 16, 0x05B6 },
{ 17, 0x05B7 },
{ 18, 0x05B8 },
{ 19, 0x05B9 },
{ 20, 0x05BB },
{ 21, 0x05BC },
{ 22, 0x05BD },
{ 23, 0x05BF },
{ 24, 0x05C1 },
{ 25, 0x05C2 },
{ 26, 0xFB1E },
{ 27, 0x064B },
{ 28, 0x064C },
{ 29, 0x064D },
/* ... */
{ 228, 0x05AE },
{ 230, 0x0300 },
{ 232, 0x302C },
{ 233, 0x0362 },
{ 234, 0x0360 },
{ 234, 0x1DCD },
{ 240, 0x0345 }
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
{
g_assert_cmpint (g_unichar_combining_class (examples[i].c), ==, examples[i].class);
}
}
/* Test that g_unichar_get_mirror() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_mirror (void)
{
gunichar mirror;
g_assert_true (g_unichar_get_mirror_char ('(', &mirror));
g_assert_cmpint (mirror, ==, ')');
g_assert_true (g_unichar_get_mirror_char (')', &mirror));
g_assert_cmpint (mirror, ==, '(');
g_assert_true (g_unichar_get_mirror_char ('{', &mirror));
g_assert_cmpint (mirror, ==, '}');
g_assert_true (g_unichar_get_mirror_char ('}', &mirror));
g_assert_cmpint (mirror, ==, '{');
g_assert_true (g_unichar_get_mirror_char (0x208D, &mirror));
g_assert_cmpint (mirror, ==, 0x208E);
g_assert_true (g_unichar_get_mirror_char (0x208E, &mirror));
g_assert_cmpint (mirror, ==, 0x208D);
g_assert_false (g_unichar_get_mirror_char ('a', &mirror));
}
/* Test that g_utf8_strup() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_strup (void)
{
char *str_up = NULL;
const char *str = "AaZz09x;\x03\x45"
"\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */
"\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
/* Testing degenerated cases */
if (g_test_undefined ())
{
g_test_expect_message (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
"*assertion*!= NULL*");
str_up = g_utf8_strup (NULL, 0);
g_test_assert_expected_messages ();
}
str_up = g_utf8_strup (str, strlen (str));
/* Tricky, comparing two unicode strings with an ASCII function */
g_assert_cmpstr (str_up, ==, "AAZZ09X;\003E\357\274\241\357\274\241");
g_free (str_up);
}
/* Test that g_utf8_strdown() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_strdown (void)
{
char *str_down = NULL;
const char *str = "AaZz09x;\x03\x07"
"\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */
"\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
/* Testing degenerated cases */
if (g_test_undefined ())
{
g_test_expect_message (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
"*assertion*!= NULL*");
str_down = g_utf8_strdown (NULL, 0);
g_test_assert_expected_messages ();
}
str_down = g_utf8_strdown (str, strlen (str));
/* Tricky, comparing two unicode strings with an ASCII function */
g_assert_cmpstr (str_down, ==, "aazz09x;\003\007\357\275\201\357\275\201");
g_free (str_down);
}
/* Test that g_utf8_strup() and g_utf8_strdown() return the correct
* value for Turkish 'i' with and without dot above. */
static void
test_turkish_strupdown (void)
{
char *str_up = NULL;
char *str_down = NULL;
const char *str = "iII"
"\xcc\x87" /* COMBINING DOT ABOVE (U+307) */
"\xc4\xb1" /* LATIN SMALL LETTER DOTLESS I (U+131) */
"\xc4\xb0"; /* LATIN CAPITAL LETTER I WITH DOT ABOVE (U+130) */
char *oldlocale = g_strdup (setlocale (LC_ALL, "tr_TR"));
if (oldlocale == NULL)
{
g_test_skip ("locale tr_TR not available");
return;
}
str_up = g_utf8_strup (str, strlen (str));
str_down = g_utf8_strdown (str, strlen (str));
/* i => LATIN CAPITAL LETTER I WITH DOT ABOVE,
* I => I,
* I + COMBINING DOT ABOVE => I + COMBINING DOT ABOVE,
* LATIN SMALL LETTER DOTLESS I => I,
* LATIN CAPITAL LETTER I WITH DOT ABOVE => LATIN CAPITAL LETTER I WITH DOT ABOVE */
g_assert_cmpstr (str_up, ==, "\xc4\xb0II\xcc\x87I\xc4\xb0");
/* i => i,
* I => LATIN SMALL LETTER DOTLESS I,
* I + COMBINING DOT ABOVE => i,
* LATIN SMALL LETTER DOTLESS I => LATIN SMALL LETTER DOTLESS I,
* LATIN CAPITAL LETTER I WITH DOT ABOVE => i */
g_assert_cmpstr (str_down, ==, "i\xc4\xb1i\xc4\xb1i");
g_free (str_up);
g_free (str_down);
setlocale (LC_ALL, oldlocale);
g_free (oldlocale);
}
/* Test that g_utf8_casefold() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_casefold (void)
{
char *str_casefold = NULL;
const char *str = "AaZz09x;"
"\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */
"\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
/* Testing degenerated cases */
if (g_test_undefined ())
{
g_test_expect_message (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
"*assertion*!= NULL*");
str_casefold = g_utf8_casefold (NULL, 0);
g_test_assert_expected_messages ();
}
str_casefold = g_utf8_casefold (str, strlen (str));
/* Tricky, comparing two unicode strings with an ASCII function */
g_assert_cmpstr (str_casefold, ==, "aazz09x;\357\275\201\357\275\201");
g_free (str_casefold);
}
/* Test that g_unichar_ismark() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_mark (void)
{
g_assert_true (g_unichar_ismark (0x0903));
g_assert_true (g_unichar_ismark (0x20DD));
g_assert_true (g_unichar_ismark (0xA806));
g_assert_false (g_unichar_ismark ('a'));
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_ismark (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_ismark (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_ismark (0xE0001));
g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isspace() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_space (void)
{
g_assert_false (g_unichar_isspace ('a'));
g_assert_true (g_unichar_isspace (' '));
g_assert_true (g_unichar_isspace ('\t'));
g_assert_true (g_unichar_isspace ('\n'));
g_assert_true (g_unichar_isspace ('\r'));
g_assert_true (g_unichar_isspace ('\f'));
g_assert_false (g_unichar_isspace (0xff41)); /* Unicode fullwidth 'a' */
g_assert_true (g_unichar_isspace (0x202F)); /* Unicode space separator */
g_assert_true (g_unichar_isspace (0x2028)); /* Unicode line separator */
g_assert_true (g_unichar_isspace (0x2029)); /* Unicode paragraph separator */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_isspace (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isspace (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isspace (0xE0001));
g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isalnum() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_alnum (void)
{
g_assert_false (g_unichar_isalnum (' '));
g_assert_true (g_unichar_isalnum ('a'));
g_assert_true (g_unichar_isalnum ('z'));
g_assert_true (g_unichar_isalnum ('0'));
g_assert_true (g_unichar_isalnum ('9'));
g_assert_true (g_unichar_isalnum ('A'));
g_assert_true (g_unichar_isalnum ('Z'));
g_assert_false (g_unichar_isalnum ('-'));
g_assert_false (g_unichar_isalnum ('*'));
g_assert_true (g_unichar_isalnum (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_true (g_unichar_isalnum (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_true (g_unichar_isalnum (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_true (g_unichar_isalnum (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_true (g_unichar_isalnum (0xFF10)); /* Unichar fullwidth '0' */
g_assert_true (g_unichar_isalnum (0xFF19)); /* Unichar fullwidth '9' */
g_assert_false (g_unichar_isalnum (0xFF0A)); /* Unichar fullwidth '*' */
/*** Testing TYPE() border cases ***/
g_assert_true (g_unichar_isalnum (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isalnum (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isalnum (0xE0001));
g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isalpha() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_alpha (void)
{
g_assert_false (g_unichar_isalpha (' '));
g_assert_true (g_unichar_isalpha ('a'));
g_assert_true (g_unichar_isalpha ('z'));
g_assert_false (g_unichar_isalpha ('0'));
g_assert_false (g_unichar_isalpha ('9'));
g_assert_true (g_unichar_isalpha ('A'));
g_assert_true (g_unichar_isalpha ('Z'));
g_assert_false (g_unichar_isalpha ('-'));
g_assert_false (g_unichar_isalpha ('*'));
g_assert_true (g_unichar_isalpha (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_true (g_unichar_isalpha (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_true (g_unichar_isalpha (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_true (g_unichar_isalpha (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_false (g_unichar_isalpha (0xFF10)); /* Unichar fullwidth '0' */
g_assert_false (g_unichar_isalpha (0xFF19)); /* Unichar fullwidth '9' */
g_assert_false (g_unichar_isalpha (0xFF0A)); /* Unichar fullwidth '*' */
/*** Testing TYPE() border cases ***/
g_assert_true (g_unichar_isalpha (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isalpha (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isalpha (0xE0001));
g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isdigit() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_digit (void)
{
g_assert_false (g_unichar_isdigit (' '));
g_assert_false (g_unichar_isdigit ('a'));
g_assert_true (g_unichar_isdigit ('0'));
g_assert_true (g_unichar_isdigit ('9'));
g_assert_false (g_unichar_isdigit ('A'));
g_assert_false (g_unichar_isdigit ('-'));
g_assert_false (g_unichar_isdigit ('*'));
g_assert_false (g_unichar_isdigit (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_false (g_unichar_isdigit (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_false (g_unichar_isdigit (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_false (g_unichar_isdigit (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_true (g_unichar_isdigit (0xFF10)); /* Unichar fullwidth '0' */
g_assert_true (g_unichar_isdigit (0xFF19)); /* Unichar fullwidth '9' */
g_assert_false (g_unichar_isdigit (0xFF0A)); /* Unichar fullwidth '*' */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_isdigit (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isdigit (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isdigit (0xE0001));
g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_digit_value() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_digit_value (void)
{
g_assert_cmpint (g_unichar_digit_value (' '), ==, -1);
g_assert_cmpint (g_unichar_digit_value ('a'), ==, -1);
g_assert_cmpint (g_unichar_digit_value ('0'), ==, 0);
g_assert_cmpint (g_unichar_digit_value ('9'), ==, 9);
g_assert_cmpint (g_unichar_digit_value ('A'), ==, -1);
g_assert_cmpint (g_unichar_digit_value ('-'), ==, -1);
g_assert_cmpint (g_unichar_digit_value (0xFF21), ==, -1); /* Unichar 'A' */
g_assert_cmpint (g_unichar_digit_value (0xFF3A), ==, -1); /* Unichar 'Z' */
g_assert_cmpint (g_unichar_digit_value (0xFF41), ==, -1); /* Unichar 'a' */
g_assert_cmpint (g_unichar_digit_value (0xFF5A), ==, -1); /* Unichar 'z' */
g_assert_cmpint (g_unichar_digit_value (0xFF10), ==, 0); /* Unichar '0' */
g_assert_cmpint (g_unichar_digit_value (0xFF19), ==, 9); /* Unichar '9' */
g_assert_cmpint (g_unichar_digit_value (0xFF0A), ==, -1); /* Unichar '*' */
/*** Testing TYPE() border cases ***/
g_assert_cmpint (g_unichar_digit_value (0x3FF5), ==, -1);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmpint (g_unichar_digit_value (0xFFEFF), ==, -1);
/* U+E0001 Language Tag */
g_assert_cmpint (g_unichar_digit_value (0xE0001), ==, -1);
g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR), ==, -1);
g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR + 1), ==, -1);
g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR_PART1), ==, -1);
g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1);
}
/* Test that g_unichar_isxdigit() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_xdigit (void)
{
g_assert_false (g_unichar_isxdigit (' '));
g_assert_true (g_unichar_isxdigit ('a'));
g_assert_true (g_unichar_isxdigit ('f'));
g_assert_false (g_unichar_isxdigit ('g'));
g_assert_false (g_unichar_isxdigit ('z'));
g_assert_true (g_unichar_isxdigit ('0'));
g_assert_true (g_unichar_isxdigit ('9'));
g_assert_true (g_unichar_isxdigit ('A'));
g_assert_true (g_unichar_isxdigit ('F'));
g_assert_false (g_unichar_isxdigit ('G'));
g_assert_false (g_unichar_isxdigit ('Z'));
g_assert_false (g_unichar_isxdigit ('-'));
g_assert_false (g_unichar_isxdigit ('*'));
g_assert_true (g_unichar_isxdigit (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_true (g_unichar_isxdigit (0xFF26)); /* Unichar fullwidth 'F' */
g_assert_false (g_unichar_isxdigit (0xFF27)); /* Unichar fullwidth 'G' */
g_assert_false (g_unichar_isxdigit (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_true (g_unichar_isxdigit (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_true (g_unichar_isxdigit (0xFF46)); /* Unichar fullwidth 'f' */
g_assert_false (g_unichar_isxdigit (0xFF47)); /* Unichar fullwidth 'g' */
g_assert_false (g_unichar_isxdigit (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_true (g_unichar_isxdigit (0xFF10)); /* Unichar fullwidth '0' */
g_assert_true (g_unichar_isxdigit (0xFF19)); /* Unichar fullwidth '9' */
g_assert_false (g_unichar_isxdigit (0xFF0A)); /* Unichar fullwidth '*' */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_isxdigit (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isxdigit (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isxdigit (0xE0001));
g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_xdigit_value() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_xdigit_value (void)
{
g_assert_cmpint (g_unichar_xdigit_value (' '), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value ('a'), ==, 10);
g_assert_cmpint (g_unichar_xdigit_value ('f'), ==, 15);
g_assert_cmpint (g_unichar_xdigit_value ('g'), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value ('0'), ==, 0);
g_assert_cmpint (g_unichar_xdigit_value ('9'), ==, 9);
g_assert_cmpint (g_unichar_xdigit_value ('A'), ==, 10);
g_assert_cmpint (g_unichar_xdigit_value ('F'), ==, 15);
g_assert_cmpint (g_unichar_xdigit_value ('G'), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value ('-'), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value (0xFF21), ==, 10); /* Unichar 'A' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF26), ==, 15); /* Unichar 'F' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF27), ==, -1); /* Unichar 'G' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF3A), ==, -1); /* Unichar 'Z' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF41), ==, 10); /* Unichar 'a' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF46), ==, 15); /* Unichar 'f' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF47), ==, -1); /* Unichar 'g' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF5A), ==, -1); /* Unichar 'z' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF10), ==, 0); /* Unichar '0' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF19), ==, 9); /* Unichar '9' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF0A), ==, -1); /* Unichar '*' */
/*** Testing TYPE() border cases ***/
g_assert_cmpint (g_unichar_xdigit_value (0x3FF5), ==, -1);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmpint (g_unichar_xdigit_value (0xFFEFF), ==, -1);
/* U+E0001 Language Tag */
g_assert_cmpint (g_unichar_xdigit_value (0xE0001), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR + 1), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1);
}
/* Test that g_unichar_ispunct() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_punctuation (void)
{
g_assert_false (g_unichar_ispunct (' '));
g_assert_false (g_unichar_ispunct ('a'));
g_assert_true (g_unichar_ispunct ('.'));
g_assert_true (g_unichar_ispunct (','));
g_assert_true (g_unichar_ispunct (';'));
g_assert_true (g_unichar_ispunct (':'));
g_assert_true (g_unichar_ispunct ('-'));
g_assert_false (g_unichar_ispunct (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_true (g_unichar_ispunct (0x005F)); /* Unichar fullwidth '.' */
g_assert_true (g_unichar_ispunct (0x058A)); /* Unichar fullwidth '-' */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_ispunct (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_ispunct (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_ispunct (0xE0001));
g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_iscntrl() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_cntrl (void)
{
g_assert_true (g_unichar_iscntrl (0x08));
g_assert_false (g_unichar_iscntrl ('a'));
g_assert_true (g_unichar_iscntrl (0x007F)); /* Unichar fullwidth <del> */
g_assert_true (g_unichar_iscntrl (0x009F)); /* Unichar fullwidth control */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_iscntrl (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_iscntrl (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_iscntrl (0xE0001));
g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isgraph() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_graph (void)
{
g_assert_false (g_unichar_isgraph (0x08));
g_assert_false (g_unichar_isgraph (' '));
g_assert_true (g_unichar_isgraph ('a'));
g_assert_true (g_unichar_isgraph ('0'));
g_assert_true (g_unichar_isgraph ('9'));
g_assert_true (g_unichar_isgraph ('A'));
g_assert_true (g_unichar_isgraph ('-'));
g_assert_true (g_unichar_isgraph ('*'));
g_assert_true (g_unichar_isgraph (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_true (g_unichar_isgraph (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_true (g_unichar_isgraph (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_true (g_unichar_isgraph (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_true (g_unichar_isgraph (0xFF10)); /* Unichar fullwidth '0' */
g_assert_true (g_unichar_isgraph (0xFF19)); /* Unichar fullwidth '9' */
g_assert_true (g_unichar_isgraph (0xFF0A)); /* Unichar fullwidth '*' */
g_assert_false (g_unichar_isgraph (0x007F)); /* Unichar fullwidth <del> */
g_assert_false (g_unichar_isgraph (0x009F)); /* Unichar fullwidth control */
/*** Testing TYPE() border cases ***/
g_assert_true (g_unichar_isgraph (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_true (g_unichar_isgraph (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isgraph (0xE0001));
g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_iszerowidth() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_zerowidth (void)
{
g_assert_false (g_unichar_iszerowidth (0x00AD));
g_assert_false (g_unichar_iszerowidth (0x115F));
g_assert_true (g_unichar_iszerowidth (0x1160));
g_assert_true (g_unichar_iszerowidth (0x11AA));
g_assert_true (g_unichar_iszerowidth (0x11FF));
g_assert_false (g_unichar_iszerowidth (0x1200));
g_assert_false (g_unichar_iszerowidth (0x200A));
g_assert_true (g_unichar_iszerowidth (0x200B));
g_assert_true (g_unichar_iszerowidth (0x200C));
g_assert_true (g_unichar_iszerowidth (0x591));
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_iszerowidth (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_iszerowidth (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_true (g_unichar_iszerowidth (0xE0001));
g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_istitle() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_title (void)
{
g_assert_true (g_unichar_istitle (0x01c5));
g_assert_true (g_unichar_istitle (0x1f88));
g_assert_true (g_unichar_istitle (0x1fcc));
g_assert_false (g_unichar_istitle ('a'));
g_assert_false (g_unichar_istitle ('A'));
g_assert_false (g_unichar_istitle (';'));
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_istitle (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_istitle (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_istitle (0xE0001));
g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR_PART1 + 1));
g_assert_cmphex (g_unichar_totitle (0x0000), ==, 0x0000);
g_assert_cmphex (g_unichar_totitle (0x01c6), ==, 0x01c5);
g_assert_cmphex (g_unichar_totitle (0x01c4), ==, 0x01c5);
g_assert_cmphex (g_unichar_totitle (0x01c5), ==, 0x01c5);
g_assert_cmphex (g_unichar_totitle (0x1f80), ==, 0x1f88);
g_assert_cmphex (g_unichar_totitle (0x1f88), ==, 0x1f88);
g_assert_cmphex (g_unichar_totitle ('a'), ==, 'A');
g_assert_cmphex (g_unichar_totitle ('A'), ==, 'A');
/*** Testing TYPE() border cases ***/
g_assert_cmphex (g_unichar_totitle (0x3FF5), ==, 0x3FF5);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmphex (g_unichar_totitle (0xFFEFF), ==, 0xFFEFF);
g_assert_cmphex (g_unichar_totitle (0xDFFFF), ==, 0xDFFFF);
/* U+E0001 Language Tag */
g_assert_cmphex (g_unichar_totitle (0xE0001), ==, 0xE0001);
g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR), ==,
G_UNICODE_LAST_CHAR);
g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR + 1), ==,
(G_UNICODE_LAST_CHAR + 1));
g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR_PART1), ==,
(G_UNICODE_LAST_CHAR_PART1));
g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
(G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isupper() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_upper (void)
{
g_assert_false (g_unichar_isupper (' '));
g_assert_false (g_unichar_isupper ('0'));
g_assert_false (g_unichar_isupper ('a'));
g_assert_true (g_unichar_isupper ('A'));
g_assert_false (g_unichar_isupper (0xff41)); /* Unicode fullwidth 'a' */
g_assert_true (g_unichar_isupper (0xff21)); /* Unicode fullwidth 'A' */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_isupper (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isupper (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isupper (0xE0001));
g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_islower() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_lower (void)
{
g_assert_false (g_unichar_islower (' '));
g_assert_false (g_unichar_islower ('0'));
g_assert_true (g_unichar_islower ('a'));
g_assert_false (g_unichar_islower ('A'));
g_assert_true (g_unichar_islower (0xff41)); /* Unicode fullwidth 'a' */
g_assert_false (g_unichar_islower (0xff21)); /* Unicode fullwidth 'A' */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_islower (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_islower (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_islower (0xE0001));
g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isprint() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_print (void)
{
g_assert_true (g_unichar_isprint (' '));
g_assert_true (g_unichar_isprint ('0'));
g_assert_true (g_unichar_isprint ('a'));
g_assert_true (g_unichar_isprint ('A'));
g_assert_true (g_unichar_isprint (0xff41)); /* Unicode fullwidth 'a' */
g_assert_true (g_unichar_isprint (0xff21)); /* Unicode fullwidth 'A' */
/*** Testing TYPE() border cases ***/
g_assert_true (g_unichar_isprint (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_true (g_unichar_isprint (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isprint (0xE0001));
g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_toupper() and g_unichar_tolower() return the
* correct values for various ASCII and Unicode alphabetic, numeric,
* and other, codepoints. */
static void
test_cases (void)
{
g_assert_cmphex (g_unichar_toupper (0x0), ==, 0x0);
g_assert_cmphex (g_unichar_tolower (0x0), ==, 0x0);
g_assert_cmphex (g_unichar_toupper ('a'), ==, 'A');
g_assert_cmphex (g_unichar_toupper ('A'), ==, 'A');
/* Unicode fullwidth 'a' == 'A' */
g_assert_cmphex (g_unichar_toupper (0xff41), ==, 0xff21);
/* Unicode fullwidth 'A' == 'A' */
g_assert_cmphex (g_unichar_toupper (0xff21), ==, 0xff21);
g_assert_cmphex (g_unichar_toupper (0x01C5), ==, 0x01C4);
g_assert_cmphex (g_unichar_toupper (0x01C6), ==, 0x01C4);
g_assert_cmphex (g_unichar_tolower ('A'), ==, 'a');
g_assert_cmphex (g_unichar_tolower ('a'), ==, 'a');
/* Unicode fullwidth 'A' == 'a' */
g_assert_cmphex (g_unichar_tolower (0xff21), ==, 0xff41);
/* Unicode fullwidth 'a' == 'a' */
g_assert_cmphex (g_unichar_tolower (0xff41), ==, 0xff41);
g_assert_cmphex (g_unichar_tolower (0x01C4), ==, 0x01C6);
g_assert_cmphex (g_unichar_tolower (0x01C5), ==, 0x01C6);
g_assert_cmphex (g_unichar_tolower (0x1F8A), ==, 0x1F82);
g_assert_cmphex (g_unichar_totitle (0x1F8A), ==, 0x1F8A);
g_assert_cmphex (g_unichar_toupper (0x1F8A), ==, 0x1F8A);
g_assert_cmphex (g_unichar_tolower (0x1FB2), ==, 0x1FB2);
g_assert_cmphex (g_unichar_toupper (0x1FB2), ==, 0x1FB2);
/* U+130 is a special case, it's a 'I' with a dot on top */
g_assert_cmphex (g_unichar_tolower (0x130), ==, 0x69);
/* Testing ATTTABLE() border cases */
g_assert_cmphex (g_unichar_toupper (0x1D6FE), ==, 0x1D6FE);
/*** Testing TYPE() border cases ***/
g_assert_cmphex (g_unichar_toupper (0x3FF5), ==, 0x3FF5);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmphex (g_unichar_toupper (0xFFEFF), ==, 0xFFEFF);
g_assert_cmphex (g_unichar_toupper (0xDFFFF), ==, 0xDFFFF);
/* U+E0001 Language Tag */
g_assert_cmphex (g_unichar_toupper (0xE0001), ==, 0xE0001);
g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR), ==,
G_UNICODE_LAST_CHAR);
g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR + 1), ==,
(G_UNICODE_LAST_CHAR + 1));
g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR_PART1), ==,
(G_UNICODE_LAST_CHAR_PART1));
g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
(G_UNICODE_LAST_CHAR_PART1 + 1));
/* Testing ATTTABLE() border cases */
g_assert_cmphex (g_unichar_tolower (0x1D6FA), ==, 0x1D6FA);
/*** Testing TYPE() border cases ***/
g_assert_cmphex (g_unichar_tolower (0x3FF5), ==, 0x3FF5);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmphex (g_unichar_tolower (0xFFEFF), ==, 0xFFEFF);
g_assert_cmphex (g_unichar_tolower (0xDFFFF), ==, 0xDFFFF);
/* U+E0001 Language Tag */
g_assert_cmphex (g_unichar_tolower (0xE0001), ==, 0xE0001);
g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR), ==,
G_UNICODE_LAST_CHAR);
g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR + 1), ==,
(G_UNICODE_LAST_CHAR + 1));
g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR_PART1), ==,
G_UNICODE_LAST_CHAR_PART1);
g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
(G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isdefined() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_defined (void)
{
g_assert_true (g_unichar_isdefined (0x0903));
g_assert_true (g_unichar_isdefined (0x20DD));
g_assert_true (g_unichar_isdefined (0x20BA));
g_assert_true (g_unichar_isdefined (0xA806));
g_assert_true (g_unichar_isdefined ('a'));
g_assert_false (g_unichar_isdefined (0x10C49));
g_assert_false (g_unichar_isdefined (0x169D));
/*** Testing TYPE() border cases ***/
g_assert_true (g_unichar_isdefined (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_true (g_unichar_isdefined (0xFFEFF));
g_assert_false (g_unichar_isdefined (0xDFFFF));
/* U+E0001 Language Tag */
g_assert_true (g_unichar_isdefined (0xE0001));
g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_iswide() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_wide (void)
{
guint i;
struct {
gunichar c;
enum {
NOT_WIDE,
WIDE_CJK,
WIDE
} wide;
} examples[] = {
/* Neutral */
{ 0x0000, NOT_WIDE },
{ 0x0483, NOT_WIDE },
{ 0x0641, NOT_WIDE },
{ 0xFFFC, NOT_WIDE },
{ 0x10000, NOT_WIDE },
{ 0xE0001, NOT_WIDE },
{ 0x2FFFE, NOT_WIDE },
{ 0x3FFFE, NOT_WIDE },
/* Narrow */
{ 0x0020, NOT_WIDE },
{ 0x0041, NOT_WIDE },
{ 0x27E6, NOT_WIDE },
/* Halfwidth */
{ 0x20A9, NOT_WIDE },
{ 0xFF61, NOT_WIDE },
{ 0xFF69, NOT_WIDE },
{ 0xFFEE, NOT_WIDE },
/* Ambiguous */
{ 0x00A1, WIDE_CJK },
{ 0x00BE, WIDE_CJK },
{ 0x02DD, WIDE_CJK },
{ 0x2020, WIDE_CJK },
{ 0xFFFD, WIDE_CJK },
{ 0x00A1, WIDE_CJK },
{ 0x1F100, WIDE_CJK },
{ 0xE0100, WIDE_CJK },
{ 0x100000, WIDE_CJK },
{ 0x10FFFD, WIDE_CJK },
/* Fullwidth */
{ 0x3000, WIDE },
{ 0xFF60, WIDE },
/* Wide */
{ 0x2329, WIDE },
{ 0x3001, WIDE },
{ 0xFE69, WIDE },
{ 0x30000, WIDE },
{ 0x3FFFD, WIDE },
/* Default Wide blocks */
{ 0x4DBF, WIDE },
{ 0x9FFF, WIDE },
{ 0xFAFF, WIDE },
{ 0x2A6DF, WIDE },
{ 0x2B73F, WIDE },
{ 0x2B81F, WIDE },
{ 0x2FA1F, WIDE },
/* Uniode-5.2 character additions */
/* Wide */
{ 0x115F, WIDE },
/* Uniode-6.0 character additions */
/* Wide */
{ 0x2B740, WIDE },
{ 0x1B000, WIDE },
{ 0x111111, NOT_WIDE }
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
{
g_assert_cmpint (g_unichar_iswide (examples[i].c), ==,
(examples[i].wide == WIDE));
g_assert_cmpint (g_unichar_iswide_cjk (examples[i].c), ==,
(examples[i].wide != NOT_WIDE));
}
};
/* Test that g_unichar_compose() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_compose (void)
{
gunichar ch;
/* Not composable */
g_assert_false (g_unichar_compose (0x0041, 0x0042, &ch) && ch == 0);
g_assert_false (g_unichar_compose (0x0041, 0, &ch) && ch == 0);
g_assert_false (g_unichar_compose (0x0066, 0x0069, &ch) && ch == 0);
/* Tricky non-composable */
g_assert_false (g_unichar_compose (0x0308, 0x0301, &ch) && ch == 0); /* !0x0344 */
g_assert_false (g_unichar_compose (0x0F71, 0x0F72, &ch) && ch == 0); /* !0x0F73 */
/* Singletons should not compose */
g_assert_false (g_unichar_compose (0x212B, 0, &ch) && ch == 0);
g_assert_false (g_unichar_compose (0x00C5, 0, &ch) && ch == 0);
g_assert_false (g_unichar_compose (0x2126, 0, &ch) && ch == 0);
g_assert_false (g_unichar_compose (0x03A9, 0, &ch) && ch == 0);
/* Pairs */
g_assert_true (g_unichar_compose (0x0041, 0x030A, &ch) && ch == 0x00C5);
g_assert_true (g_unichar_compose (0x006F, 0x0302, &ch) && ch == 0x00F4);
g_assert_true (g_unichar_compose (0x1E63, 0x0307, &ch) && ch == 0x1E69);
g_assert_true (g_unichar_compose (0x0073, 0x0323, &ch) && ch == 0x1E63);
g_assert_true (g_unichar_compose (0x0064, 0x0307, &ch) && ch == 0x1E0B);
g_assert_true (g_unichar_compose (0x0064, 0x0323, &ch) && ch == 0x1E0D);
/* Hangul */
g_assert_true (g_unichar_compose (0xD4CC, 0x11B6, &ch) && ch == 0xD4DB);
g_assert_true (g_unichar_compose (0x1111, 0x1171, &ch) && ch == 0xD4CC);
g_assert_true (g_unichar_compose (0xCE20, 0x11B8, &ch) && ch == 0xCE31);
g_assert_true (g_unichar_compose (0x110E, 0x1173, &ch) && ch == 0xCE20);
}
/* Test that g_unichar_decompose() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_decompose (void)
{
gunichar a, b;
/* Not decomposable */
g_assert_false (g_unichar_decompose (0x0041, &a, &b) && a == 0x0041 && b == 0);
g_assert_false (g_unichar_decompose (0xFB01, &a, &b) && a == 0xFB01 && b == 0);
/* Singletons */
g_assert_true (g_unichar_decompose (0x212B, &a, &b) && a == 0x00C5 && b == 0);
g_assert_true (g_unichar_decompose (0x2126, &a, &b) && a == 0x03A9 && b == 0);
/* Tricky pairs */
g_assert_true (g_unichar_decompose (0x0344, &a, &b) && a == 0x0308 && b == 0x0301);
g_assert_true (g_unichar_decompose (0x0F73, &a, &b) && a == 0x0F71 && b == 0x0F72);
/* Pairs */
g_assert_true (g_unichar_decompose (0x00C5, &a, &b) && a == 0x0041 && b == 0x030A);
g_assert_true (g_unichar_decompose (0x00F4, &a, &b) && a == 0x006F && b == 0x0302);
g_assert_true (g_unichar_decompose (0x1E69, &a, &b) && a == 0x1E63 && b == 0x0307);
g_assert_true (g_unichar_decompose (0x1E63, &a, &b) && a == 0x0073 && b == 0x0323);
g_assert_true (g_unichar_decompose (0x1E0B, &a, &b) && a == 0x0064 && b == 0x0307);
g_assert_true (g_unichar_decompose (0x1E0D, &a, &b) && a == 0x0064 && b == 0x0323);
/* Hangul */
g_assert_true (g_unichar_decompose (0xD4DB, &a, &b) && a == 0xD4CC && b == 0x11B6);
g_assert_true (g_unichar_decompose (0xD4CC, &a, &b) && a == 0x1111 && b == 0x1171);
g_assert_true (g_unichar_decompose (0xCE31, &a, &b) && a == 0xCE20 && b == 0x11B8);
g_assert_true (g_unichar_decompose (0xCE20, &a, &b) && a == 0x110E && b == 0x1173);
}
/* Test that g_unichar_fully_decompose() returns the correct value for
* various ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_fully_decompose_canonical (void)
{
gunichar decomp[5];
gsize len;
#define TEST_DECOMP(ch, expected_len, a, b, c, d) \
len = g_unichar_fully_decompose (ch, FALSE, decomp, G_N_ELEMENTS (decomp)); \
g_assert_cmpint (expected_len, ==, len); \
if (expected_len >= 1) g_assert_cmphex (decomp[0], ==, a); \
if (expected_len >= 2) g_assert_cmphex (decomp[1], ==, b); \
if (expected_len >= 3) g_assert_cmphex (decomp[2], ==, c); \
if (expected_len >= 4) g_assert_cmphex (decomp[3], ==, d); \
#define TEST0(ch) TEST_DECOMP (ch, 1, ch, 0, 0, 0)
#define TEST1(ch, a) TEST_DECOMP (ch, 1, a, 0, 0, 0)
#define TEST2(ch, a, b) TEST_DECOMP (ch, 2, a, b, 0, 0)
#define TEST3(ch, a, b, c) TEST_DECOMP (ch, 3, a, b, c, 0)
#define TEST4(ch, a, b, c, d) TEST_DECOMP (ch, 4, a, b, c, d)
/* Not decomposable */
TEST0 (0x0041);
TEST0 (0xFB01);
/* Singletons */
TEST2 (0x212B, 0x0041, 0x030A);
TEST1 (0x2126, 0x03A9);
/* Tricky pairs */
TEST2 (0x0344, 0x0308, 0x0301);
TEST2 (0x0F73, 0x0F71, 0x0F72);
/* General */
TEST2 (0x00C5, 0x0041, 0x030A);
TEST2 (0x00F4, 0x006F, 0x0302);
TEST3 (0x1E69, 0x0073, 0x0323, 0x0307);
TEST2 (0x1E63, 0x0073, 0x0323);
TEST2 (0x1E0B, 0x0064, 0x0307);
TEST2 (0x1E0D, 0x0064, 0x0323);
/* Hangul */
TEST3 (0xD4DB, 0x1111, 0x1171, 0x11B6);
TEST2 (0xD4CC, 0x1111, 0x1171);
TEST3 (0xCE31, 0x110E, 0x1173, 0x11B8);
TEST2 (0xCE20, 0x110E, 0x1173);
#undef TEST_DECOMP
}
/* Test that g_unicode_canonical_decomposition() returns the correct
* value for various ASCII and Unicode alphabetic, numeric, and other,
* codepoints. */
static void
test_canonical_decomposition (void)
{
gunichar *decomp;
gsize len;
#define TEST_DECOMP(ch, expected_len, a, b, c, d) \
decomp = g_unicode_canonical_decomposition (ch, &len); \
g_assert_cmpint (expected_len, ==, len); \
if (expected_len >= 1) g_assert_cmphex (decomp[0], ==, a); \
if (expected_len >= 2) g_assert_cmphex (decomp[1], ==, b); \
if (expected_len >= 3) g_assert_cmphex (decomp[2], ==, c); \
if (expected_len >= 4) g_assert_cmphex (decomp[3], ==, d); \
g_free (decomp);
#define TEST0(ch) TEST_DECOMP (ch, 1, ch, 0, 0, 0)
#define TEST1(ch, a) TEST_DECOMP (ch, 1, a, 0, 0, 0)
#define TEST2(ch, a, b) TEST_DECOMP (ch, 2, a, b, 0, 0)
#define TEST3(ch, a, b, c) TEST_DECOMP (ch, 3, a, b, c, 0)
#define TEST4(ch, a, b, c, d) TEST_DECOMP (ch, 4, a, b, c, d)
/* Not decomposable */
TEST0 (0x0041);
TEST0 (0xFB01);
/* Singletons */
TEST2 (0x212B, 0x0041, 0x030A);
TEST1 (0x2126, 0x03A9);
/* Tricky pairs */
TEST2 (0x0344, 0x0308, 0x0301);
TEST2 (0x0F73, 0x0F71, 0x0F72);
/* General */
TEST2 (0x00C5, 0x0041, 0x030A);
TEST2 (0x00F4, 0x006F, 0x0302);
TEST3 (0x1E69, 0x0073, 0x0323, 0x0307);
TEST2 (0x1E63, 0x0073, 0x0323);
TEST2 (0x1E0B, 0x0064, 0x0307);
TEST2 (0x1E0D, 0x0064, 0x0323);
/* Hangul */
TEST3 (0xD4DB, 0x1111, 0x1171, 0x11B6);
TEST2 (0xD4CC, 0x1111, 0x1171);
TEST3 (0xCE31, 0x110E, 0x1173, 0x11B8);
TEST2 (0xCE20, 0x110E, 0x1173);
#undef TEST_DECOMP
}
/* Test that g_unichar_decompose() whenever encouttering a char ch
* decomposes into a and b, b itself won't decompose any further. */
static void
test_decompose_tail (void)
{
gunichar ch, a, b, c, d;
/* Test that whenever a char ch decomposes into a and b, b itself
* won't decompose any further. */
for (ch = 0; ch < 0x110000; ch++)
if (g_unichar_decompose (ch, &a, &b))
g_assert_false (g_unichar_decompose (b, &c, &d));
else
{
g_assert_cmpuint (a, ==, ch);
g_assert_cmpuint (b, ==, 0);
}
}
/* Test that all canonical decompositions of g_unichar_fully_decompose()
* are at most 4 in length, and compatibility decompositions are
* at most 18 in length. */
static void
test_fully_decompose_len (void)
{
gunichar ch;
/* Test that all canonical decompositions are at most 4 in length,
* and compatibility decompositions are at most 18 in length.
*/
for (ch = 0; ch < 0x110000; ch++) {
g_assert_cmpint (g_unichar_fully_decompose (ch, FALSE, NULL, 0), <=, 4);
g_assert_cmpint (g_unichar_fully_decompose (ch, TRUE, NULL, 0), <=, 18);
}
}
/* Check various examples from Unicode Annex #15 for NFD and NFC
* normalization.
*/
static void
test_normalization (void)
{
const struct {
const char *source;
const char *nfd;
const char *nfc;
} tests[] = {
// Singletons
{ "\xe2\x84\xab", "A\xcc\x8a", "Å" }, // U+212B ANGSTROM SIGN
{ "\xe2\x84\xa6", "Ω", "Ω" }, // U+2126 OHM SIGN
// Canonical Composites
{ "Å", "A\xcc\x8a", "Å" }, // U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
{ "ô", "o\xcc\x82", "ô" }, // U+00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
// Multiple Combining Marks
{ "\xe1\xb9\xa9", "s\xcc\xa3\xcc\x87", "" }, // U+1E69 LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
{ "\xe1\xb8\x8b\xcc\xa3", "d\xcc\xa3\xcc\x87", "ḍ̇" },
{ "q\xcc\x87\xcc\xa3", "q\xcc\xa3\xcc\x87", "q̣̇" },
// Compatibility Composites
{ "", "", "" }, // U+FB01 LATIN SMALL LIGATURE FI
{ "2\xe2\x81\xb5", "2\xe2\x81\xb5", "2⁵" },
{ "\xe1\xba\x9b\xcc\xa3", "\xc5\xbf\xcc\xa3\xcc\x87", "ẛ̣" },
// Tests for behavior with reordered marks
{ "s\xcc\x87\xcc\xa3", "s\xcc\xa3\xcc\x87", "" },
{ "α\xcc\x94\xcd\x82", "α\xcc\x94\xcd\x82", "" },
{ "α\xcd\x82\xcc\x94", "α\xcd\x82\xcc\x94", "\xcc\x94" },
};
gsize i;
for (i = 0; i < G_N_ELEMENTS (tests); i++)
{
char *nfd, *nfc;
nfd = g_utf8_normalize (tests[i].source, -1, G_NORMALIZE_NFD);
g_assert_cmpstr (nfd, ==, tests[i].nfd);
nfc = g_utf8_normalize (tests[i].nfd, -1, G_NORMALIZE_NFC);
g_assert_cmpstr (nfc, ==, tests[i].nfc);
g_free (nfd);
g_free (nfc);
}
}
static void
test_iso15924 (void)
{
const struct {
GUnicodeScript script;
char four_letter_code[5];
} data[] = {
{ G_UNICODE_SCRIPT_COMMON, "Zyyy" },
{ G_UNICODE_SCRIPT_INHERITED, "Zinh" },
{ G_UNICODE_SCRIPT_ARABIC, "Arab" },
{ G_UNICODE_SCRIPT_ARMENIAN, "Armn" },
{ G_UNICODE_SCRIPT_BENGALI, "Beng" },
{ G_UNICODE_SCRIPT_BOPOMOFO, "Bopo" },
{ G_UNICODE_SCRIPT_CHEROKEE, "Cher" },
{ G_UNICODE_SCRIPT_COPTIC, "Copt" },
{ G_UNICODE_SCRIPT_CYRILLIC, "Cyrl" },
{ G_UNICODE_SCRIPT_DESERET, "Dsrt" },
{ G_UNICODE_SCRIPT_DEVANAGARI, "Deva" },
{ G_UNICODE_SCRIPT_ETHIOPIC, "Ethi" },
{ G_UNICODE_SCRIPT_GEORGIAN, "Geor" },
{ G_UNICODE_SCRIPT_GOTHIC, "Goth" },
{ G_UNICODE_SCRIPT_GREEK, "Grek" },
{ G_UNICODE_SCRIPT_GUJARATI, "Gujr" },
{ G_UNICODE_SCRIPT_GURMUKHI, "Guru" },
{ G_UNICODE_SCRIPT_HAN, "Hani" },
{ G_UNICODE_SCRIPT_HANGUL, "Hang" },
{ G_UNICODE_SCRIPT_HEBREW, "Hebr" },
{ G_UNICODE_SCRIPT_HIRAGANA, "Hira" },
{ G_UNICODE_SCRIPT_KANNADA, "Knda" },
{ G_UNICODE_SCRIPT_KATAKANA, "Kana" },
{ G_UNICODE_SCRIPT_KHMER, "Khmr" },
{ G_UNICODE_SCRIPT_LAO, "Laoo" },
{ G_UNICODE_SCRIPT_LATIN, "Latn" },
{ G_UNICODE_SCRIPT_MALAYALAM, "Mlym" },
{ G_UNICODE_SCRIPT_MONGOLIAN, "Mong" },
{ G_UNICODE_SCRIPT_MYANMAR, "Mymr" },
{ G_UNICODE_SCRIPT_OGHAM, "Ogam" },
{ G_UNICODE_SCRIPT_OLD_ITALIC, "Ital" },
{ G_UNICODE_SCRIPT_ORIYA, "Orya" },
{ G_UNICODE_SCRIPT_RUNIC, "Runr" },
{ G_UNICODE_SCRIPT_SINHALA, "Sinh" },
{ G_UNICODE_SCRIPT_SYRIAC, "Syrc" },
{ G_UNICODE_SCRIPT_TAMIL, "Taml" },
{ G_UNICODE_SCRIPT_TELUGU, "Telu" },
{ G_UNICODE_SCRIPT_THAANA, "Thaa" },
{ G_UNICODE_SCRIPT_THAI, "Thai" },
{ G_UNICODE_SCRIPT_TIBETAN, "Tibt" },
{ G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, "Cans" },
{ G_UNICODE_SCRIPT_YI, "Yiii" },
{ G_UNICODE_SCRIPT_TAGALOG, "Tglg" },
{ G_UNICODE_SCRIPT_HANUNOO, "Hano" },
{ G_UNICODE_SCRIPT_BUHID, "Buhd" },
{ G_UNICODE_SCRIPT_TAGBANWA, "Tagb" },
/* Unicode-4.0 additions */
{ G_UNICODE_SCRIPT_BRAILLE, "Brai" },
{ G_UNICODE_SCRIPT_CYPRIOT, "Cprt" },
{ G_UNICODE_SCRIPT_LIMBU, "Limb" },
{ G_UNICODE_SCRIPT_OSMANYA, "Osma" },
{ G_UNICODE_SCRIPT_SHAVIAN, "Shaw" },
{ G_UNICODE_SCRIPT_LINEAR_B, "Linb" },
{ G_UNICODE_SCRIPT_TAI_LE, "Tale" },
{ G_UNICODE_SCRIPT_UGARITIC, "Ugar" },
/* Unicode-4.1 additions */
{ G_UNICODE_SCRIPT_NEW_TAI_LUE, "Talu" },
{ G_UNICODE_SCRIPT_BUGINESE, "Bugi" },
{ G_UNICODE_SCRIPT_GLAGOLITIC, "Glag" },
{ G_UNICODE_SCRIPT_TIFINAGH, "Tfng" },
{ G_UNICODE_SCRIPT_SYLOTI_NAGRI, "Sylo" },
{ G_UNICODE_SCRIPT_OLD_PERSIAN, "Xpeo" },
{ G_UNICODE_SCRIPT_KHAROSHTHI, "Khar" },
/* Unicode-5.0 additions */
{ G_UNICODE_SCRIPT_UNKNOWN, "Zzzz" },
{ G_UNICODE_SCRIPT_BALINESE, "Bali" },
{ G_UNICODE_SCRIPT_CUNEIFORM, "Xsux" },
{ G_UNICODE_SCRIPT_PHOENICIAN, "Phnx" },
{ G_UNICODE_SCRIPT_PHAGS_PA, "Phag" },
{ G_UNICODE_SCRIPT_NKO, "Nkoo" },
/* Unicode-5.1 additions */
{ G_UNICODE_SCRIPT_KAYAH_LI, "Kali" },
{ G_UNICODE_SCRIPT_LEPCHA, "Lepc" },
{ G_UNICODE_SCRIPT_REJANG, "Rjng" },
{ G_UNICODE_SCRIPT_SUNDANESE, "Sund" },
{ G_UNICODE_SCRIPT_SAURASHTRA, "Saur" },
{ G_UNICODE_SCRIPT_CHAM, "Cham" },
{ G_UNICODE_SCRIPT_OL_CHIKI, "Olck" },
{ G_UNICODE_SCRIPT_VAI, "Vaii" },
{ G_UNICODE_SCRIPT_CARIAN, "Cari" },
{ G_UNICODE_SCRIPT_LYCIAN, "Lyci" },
{ G_UNICODE_SCRIPT_LYDIAN, "Lydi" },
/* Unicode-5.2 additions */
{ G_UNICODE_SCRIPT_AVESTAN, "Avst" },
{ G_UNICODE_SCRIPT_BAMUM, "Bamu" },
{ G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, "Egyp" },
{ G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, "Armi" },
{ G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, "Phli" },
{ G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, "Prti" },
{ G_UNICODE_SCRIPT_JAVANESE, "Java" },
{ G_UNICODE_SCRIPT_KAITHI, "Kthi" },
{ G_UNICODE_SCRIPT_LISU, "Lisu" },
{ G_UNICODE_SCRIPT_MEETEI_MAYEK, "Mtei" },
{ G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, "Sarb" },
{ G_UNICODE_SCRIPT_OLD_TURKIC, "Orkh" },
{ G_UNICODE_SCRIPT_SAMARITAN, "Samr" },
{ G_UNICODE_SCRIPT_TAI_THAM, "Lana" },
{ G_UNICODE_SCRIPT_TAI_VIET, "Tavt" },
/* Unicode-6.0 additions */
{ G_UNICODE_SCRIPT_BATAK, "Batk" },
{ G_UNICODE_SCRIPT_BRAHMI, "Brah" },
{ G_UNICODE_SCRIPT_MANDAIC, "Mand" },
/* Unicode-6.1 additions */
{ G_UNICODE_SCRIPT_CHAKMA, "Cakm" },
{ G_UNICODE_SCRIPT_MEROITIC_CURSIVE, "Merc" },
{ G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, "Mero" },
{ G_UNICODE_SCRIPT_MIAO, "Plrd" },
{ G_UNICODE_SCRIPT_SHARADA, "Shrd" },
{ G_UNICODE_SCRIPT_SORA_SOMPENG, "Sora" },
{ G_UNICODE_SCRIPT_TAKRI, "Takr" },
/* Unicode 7.0 additions */
{ G_UNICODE_SCRIPT_BASSA_VAH, "Bass" },
{ G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, "Aghb" },
{ G_UNICODE_SCRIPT_DUPLOYAN, "Dupl" },
{ G_UNICODE_SCRIPT_ELBASAN, "Elba" },
{ G_UNICODE_SCRIPT_GRANTHA, "Gran" },
{ G_UNICODE_SCRIPT_KHOJKI, "Khoj" },
{ G_UNICODE_SCRIPT_KHUDAWADI, "Sind" },
{ G_UNICODE_SCRIPT_LINEAR_A, "Lina" },
{ G_UNICODE_SCRIPT_MAHAJANI, "Mahj" },
{ G_UNICODE_SCRIPT_MANICHAEAN, "Mani" },
{ G_UNICODE_SCRIPT_MENDE_KIKAKUI, "Mend" },
{ G_UNICODE_SCRIPT_MODI, "Modi" },
{ G_UNICODE_SCRIPT_MRO, "Mroo" },
{ G_UNICODE_SCRIPT_NABATAEAN, "Nbat" },
{ G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, "Narb" },
{ G_UNICODE_SCRIPT_OLD_PERMIC, "Perm" },
{ G_UNICODE_SCRIPT_PAHAWH_HMONG, "Hmng" },
{ G_UNICODE_SCRIPT_PALMYRENE, "Palm" },
{ G_UNICODE_SCRIPT_PAU_CIN_HAU, "Pauc" },
{ G_UNICODE_SCRIPT_PSALTER_PAHLAVI, "Phlp" },
{ G_UNICODE_SCRIPT_SIDDHAM, "Sidd" },
{ G_UNICODE_SCRIPT_TIRHUTA, "Tirh" },
{ G_UNICODE_SCRIPT_WARANG_CITI, "Wara" },
/* Unicode 8.0 additions */
{ G_UNICODE_SCRIPT_AHOM, "Ahom" },
{ G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS, "Hluw" },
{ G_UNICODE_SCRIPT_HATRAN, "Hatr" },
{ G_UNICODE_SCRIPT_MULTANI, "Mult" },
{ G_UNICODE_SCRIPT_OLD_HUNGARIAN, "Hung" },
{ G_UNICODE_SCRIPT_SIGNWRITING, "Sgnw" },
/* Unicode 9.0 additions */
{ G_UNICODE_SCRIPT_ADLAM, "Adlm" },
{ G_UNICODE_SCRIPT_BHAIKSUKI, "Bhks" },
{ G_UNICODE_SCRIPT_MARCHEN, "Marc" },
{ G_UNICODE_SCRIPT_NEWA, "Newa" },
{ G_UNICODE_SCRIPT_OSAGE, "Osge" },
{ G_UNICODE_SCRIPT_TANGUT, "Tang" },
/* Unicode 10.0 additions */
{ G_UNICODE_SCRIPT_MASARAM_GONDI, "Gonm" },
{ G_UNICODE_SCRIPT_NUSHU, "Nshu" },
{ G_UNICODE_SCRIPT_SOYOMBO, "Soyo" },
{ G_UNICODE_SCRIPT_ZANABAZAR_SQUARE, "Zanb" },
/* Unicode 11.0 additions */
{ G_UNICODE_SCRIPT_DOGRA, "Dogr" },
{ G_UNICODE_SCRIPT_GUNJALA_GONDI, "Gong" },
{ G_UNICODE_SCRIPT_HANIFI_ROHINGYA, "Rohg" },
{ G_UNICODE_SCRIPT_MAKASAR, "Maka" },
{ G_UNICODE_SCRIPT_MEDEFAIDRIN, "Medf" },
{ G_UNICODE_SCRIPT_OLD_SOGDIAN, "Sogo" },
{ G_UNICODE_SCRIPT_SOGDIAN, "Sogd" },
/* Unicode 12.0 additions */
{ G_UNICODE_SCRIPT_ELYMAIC, "Elym" },
{ G_UNICODE_SCRIPT_NANDINAGARI, "Nand" },
{ G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG, "Hmnp" },
{ G_UNICODE_SCRIPT_WANCHO, "Wcho" },
/* Unicode 13.0 additions */
{ G_UNICODE_SCRIPT_CHORASMIAN, "Chrs" },
{ G_UNICODE_SCRIPT_DIVES_AKURU, "Diak" },
{ G_UNICODE_SCRIPT_KHITAN_SMALL_SCRIPT, "Kits" },
{ G_UNICODE_SCRIPT_YEZIDI, "Yezi" },
};
guint i;
g_assert_cmphex (0, ==,
g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_INVALID_CODE));
g_assert_cmphex (0x5A7A7A7A, ==, g_unicode_script_to_iso15924 (1000));
g_assert_cmphex (0x41726162, ==,
g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_ARABIC));
g_assert_cmphex (G_UNICODE_SCRIPT_INVALID_CODE, ==,
g_unicode_script_from_iso15924 (0));
g_assert_cmphex (G_UNICODE_SCRIPT_UNKNOWN, ==,
g_unicode_script_from_iso15924 (0x12345678));
#define PACK(a,b,c,d) \
((guint32)((((guint8)(a))<<24)|(((guint8)(b))<<16)|(((guint8)(c))<<8)|((guint8)(d))))
for (i = 0; i < G_N_ELEMENTS (data); i++)
{
guint32 code = PACK (data[i].four_letter_code[0],
data[i].four_letter_code[1],
data[i].four_letter_code[2],
data[i].four_letter_code[3]);
g_assert_cmphex (g_unicode_script_to_iso15924 (data[i].script), ==, code);
g_assert_cmpint (g_unicode_script_from_iso15924 (code), ==, data[i].script);
}
#undef PACK
}
int
main (int argc,
char *argv[])
{
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/unicode/alnum", test_alnum);
g_test_add_func ("/unicode/alpha", test_alpha);
g_test_add_func ("/unicode/break-type", test_unichar_break_type);
g_test_add_func ("/unicode/canonical-decomposition", test_canonical_decomposition);
g_test_add_func ("/unicode/casefold", test_casefold);
g_test_add_func ("/unicode/cases", test_cases);
g_test_add_func ("/unicode/character-type", test_unichar_character_type);
g_test_add_func ("/unicode/cntrl", test_cntrl);
g_test_add_func ("/unicode/combining-class", test_combining_class);
g_test_add_func ("/unicode/compose", test_compose);
g_test_add_func ("/unicode/decompose", test_decompose);
g_test_add_func ("/unicode/decompose-tail", test_decompose_tail);
g_test_add_func ("/unicode/defined", test_defined);
g_test_add_func ("/unicode/digit", test_digit);
g_test_add_func ("/unicode/digit-value", test_digit_value);
g_test_add_func ("/unicode/fully-decompose-canonical", test_fully_decompose_canonical);
g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
g_test_add_func ("/unicode/normalization", test_normalization);
g_test_add_func ("/unicode/graph", test_graph);
g_test_add_func ("/unicode/iso15924", test_iso15924);
g_test_add_func ("/unicode/lower", test_lower);
g_test_add_func ("/unicode/mark", test_mark);
g_test_add_func ("/unicode/mirror", test_mirror);
g_test_add_func ("/unicode/print", test_print);
g_test_add_func ("/unicode/punctuation", test_punctuation);
g_test_add_func ("/unicode/script", test_unichar_script);
g_test_add_func ("/unicode/space", test_space);
g_test_add_func ("/unicode/strdown", test_strdown);
g_test_add_func ("/unicode/strup", test_strup);
g_test_add_func ("/unicode/turkish-strupdown", test_turkish_strupdown);
g_test_add_func ("/unicode/title", test_title);
g_test_add_func ("/unicode/upper", test_upper);
g_test_add_func ("/unicode/validate", test_unichar_validate);
g_test_add_func ("/unicode/wide", test_wide);
g_test_add_func ("/unicode/xdigit", test_xdigit);
g_test_add_func ("/unicode/xdigit-value", test_xdigit_value);
g_test_add_func ("/unicode/zero-width", test_zerowidth);
return g_test_run();
}