glib/glib/tests/unicode.c
Philip Withnall f9f74efd76
tests: Improve Unicode composition code coverage
This adds various additional tests to cover branches of `gunidecomp.c`
which are not already covered, bringing our branch coverage of that file
up to 100% (if you ignore `g_utf8_normalize()`, which is tested by
`unicode-normalize.c` and I’m counting it separately).

Signed-off-by: Philip Withnall <pwithnall@gnome.org>

Helps: #3470
2024-10-21 19:32:50 +01:00

2117 lines
84 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* Unit tests for utilities
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (C) 2011 Google, Inc.
*
* SPDX-License-Identifier: LicenseRef-old-glib-tests
*
* This work is provided "as is"; redistribution and modification
* in whole or in part, in any medium, physical or electronic is
* permitted without restriction.
*
* This work is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* In no event shall the authors or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even
* if advised of the possibility of such damage.
*
* Author: Matthias Clasen, Behdad Esfahbod
*/
/* We are testing some deprecated APIs here */
#ifndef GLIB_DISABLE_DEPRECATION_WARNINGS
#define GLIB_DISABLE_DEPRECATION_WARNINGS
#endif
#include <locale.h>
#include <stdio.h>
#include "glib.h"
#include "glib/gunidecomp.h"
#ifdef G_OS_WIN32
#include <windows.h>
#endif
static void
save_and_clear_env (const char *name,
char **save)
{
*save = g_strdup (g_getenv (name));
g_unsetenv (name);
}
/* Test that g_unichar_validate() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_unichar_validate (void)
{
g_assert_true (g_unichar_validate ('j'));
g_assert_true (g_unichar_validate (8356));
g_assert_true (g_unichar_validate (8356));
g_assert_true (g_unichar_validate (0xFDD1));
g_assert_true (g_unichar_validate (917760));
g_assert_false (g_unichar_validate (0x110000));
}
/* Test that g_unichar_type() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_unichar_character_type (void)
{
guint i;
struct {
GUnicodeType type;
gunichar c;
} examples[] = {
{ G_UNICODE_CONTROL, 0x000D },
{ G_UNICODE_FORMAT, 0x200E },
/* G_UNICODE_UNASSIGNED */
{ G_UNICODE_PRIVATE_USE, 0xE000 },
{ G_UNICODE_SURROGATE, 0xD800 },
{ G_UNICODE_LOWERCASE_LETTER, 0x0061 },
{ G_UNICODE_MODIFIER_LETTER, 0x02B0 },
{ G_UNICODE_OTHER_LETTER, 0x3400 },
{ G_UNICODE_TITLECASE_LETTER, 0x01C5 },
{ G_UNICODE_UPPERCASE_LETTER, 0xFF21 },
{ G_UNICODE_SPACING_MARK, 0x0903 },
{ G_UNICODE_ENCLOSING_MARK, 0x20DD },
{ G_UNICODE_NON_SPACING_MARK, 0xA806 },
{ G_UNICODE_DECIMAL_NUMBER, 0xFF10 },
{ G_UNICODE_LETTER_NUMBER, 0x16EE },
{ G_UNICODE_OTHER_NUMBER, 0x17F0 },
{ G_UNICODE_CONNECT_PUNCTUATION, 0x005F },
{ G_UNICODE_DASH_PUNCTUATION, 0x058A },
{ G_UNICODE_CLOSE_PUNCTUATION, 0x0F3B },
{ G_UNICODE_FINAL_PUNCTUATION, 0x2019 },
{ G_UNICODE_INITIAL_PUNCTUATION, 0x2018 },
{ G_UNICODE_OTHER_PUNCTUATION, 0x2016 },
{ G_UNICODE_OPEN_PUNCTUATION, 0x0F3A },
{ G_UNICODE_CURRENCY_SYMBOL, 0x20A0 },
{ G_UNICODE_MODIFIER_SYMBOL, 0x309B },
{ G_UNICODE_MATH_SYMBOL, 0xFB29 },
{ G_UNICODE_OTHER_SYMBOL, 0x00A6 },
{ G_UNICODE_LINE_SEPARATOR, 0x2028 },
{ G_UNICODE_PARAGRAPH_SEPARATOR, 0x2029 },
{ G_UNICODE_SPACE_SEPARATOR, 0x202F },
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
{
g_assert_cmpint (g_unichar_type (examples[i].c), ==, examples[i].type);
}
/*** Testing TYPE() border cases ***/
g_assert_cmpint (g_unichar_type (0x3FF5), ==, 0x07);
/* U+FFEFF Plane 15 Private Use */
g_assert_cmpint (g_unichar_type (0xFFEFF), ==, 0x03);
/* U+E0001 Language Tag */
g_assert_cmpint (g_unichar_type (0xE0001), ==, 0x01);
g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR), ==, 0x02);
g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR + 1), ==, 0x02);
g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR_PART1), ==, 0x02);
g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR_PART1 + 1), ==, 0x02);
}
/* Test that g_unichar_break_type() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_unichar_break_type (void)
{
guint i;
struct {
GUnicodeBreakType type;
gunichar c;
} examples[] = {
{ G_UNICODE_BREAK_MANDATORY, 0x2028 },
{ G_UNICODE_BREAK_CARRIAGE_RETURN, 0x000D },
{ G_UNICODE_BREAK_LINE_FEED, 0x000A },
{ G_UNICODE_BREAK_COMBINING_MARK, 0x0300 },
{ G_UNICODE_BREAK_SURROGATE, 0xD800 },
{ G_UNICODE_BREAK_ZERO_WIDTH_SPACE, 0x200B },
{ G_UNICODE_BREAK_INSEPARABLE, 0x2024 },
{ G_UNICODE_BREAK_NON_BREAKING_GLUE, 0x00A0 },
{ G_UNICODE_BREAK_CONTINGENT, 0xFFFC },
{ G_UNICODE_BREAK_SPACE, 0x0020 },
{ G_UNICODE_BREAK_AFTER, 0x05BE },
{ G_UNICODE_BREAK_BEFORE, 0x02C8 },
{ G_UNICODE_BREAK_BEFORE_AND_AFTER, 0x2014 },
{ G_UNICODE_BREAK_HYPHEN, 0x002D },
{ G_UNICODE_BREAK_NON_STARTER, 0x17D6 },
{ G_UNICODE_BREAK_OPEN_PUNCTUATION, 0x0028 },
{ G_UNICODE_BREAK_CLOSE_PARENTHESIS, 0x0029 },
{ G_UNICODE_BREAK_CLOSE_PUNCTUATION, 0x007D },
{ G_UNICODE_BREAK_QUOTATION, 0x0022 },
{ G_UNICODE_BREAK_EXCLAMATION, 0x0021 },
{ G_UNICODE_BREAK_IDEOGRAPHIC, 0x2E80 },
{ G_UNICODE_BREAK_NUMERIC, 0x0030 },
{ G_UNICODE_BREAK_INFIX_SEPARATOR, 0x002C },
{ G_UNICODE_BREAK_SYMBOL, 0x002F },
{ G_UNICODE_BREAK_ALPHABETIC, 0x0023 },
{ G_UNICODE_BREAK_PREFIX, 0x0024 },
{ G_UNICODE_BREAK_POSTFIX, 0x0025 },
{ G_UNICODE_BREAK_COMPLEX_CONTEXT, 0x0E01 },
{ G_UNICODE_BREAK_AMBIGUOUS, 0x00F7 },
{ G_UNICODE_BREAK_UNKNOWN, 0xE000 },
{ G_UNICODE_BREAK_NEXT_LINE, 0x0085 },
{ G_UNICODE_BREAK_WORD_JOINER, 0x2060 },
{ G_UNICODE_BREAK_HANGUL_L_JAMO, 0x1100 },
{ G_UNICODE_BREAK_HANGUL_V_JAMO, 0x1160 },
{ G_UNICODE_BREAK_HANGUL_T_JAMO, 0x11A8 },
{ G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, 0xAC00 },
{ G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE, 0xAC01 },
{ G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER, 0x3041 },
{ G_UNICODE_BREAK_HEBREW_LETTER, 0x05D0 },
{ G_UNICODE_BREAK_REGIONAL_INDICATOR, 0x1F1F6 },
{ G_UNICODE_BREAK_EMOJI_BASE, 0x1F466 },
{ G_UNICODE_BREAK_EMOJI_MODIFIER, 0x1F3FB },
{ G_UNICODE_BREAK_ZERO_WIDTH_JOINER, 0x200D },
{ G_UNICODE_BREAK_AKSARA, 0x1B45 },
{ G_UNICODE_BREAK_AKSARA_PRE_BASE, 0x1193F },
{ G_UNICODE_BREAK_AKSARA_START, 0x11F50 },
{ G_UNICODE_BREAK_VIRAMA_FINAL, 0x1BF3 },
{ G_UNICODE_BREAK_VIRAMA, 0xA9C0 },
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
{
g_assert_cmpint (g_unichar_break_type (examples[i].c), ==, examples[i].type);
}
}
/* Test that g_unichar_get_script() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_unichar_script (void)
{
guint i;
struct {
GUnicodeScript script;
gunichar c;
} examples[] = {
{ G_UNICODE_SCRIPT_COMMON, 0x002A },
{ G_UNICODE_SCRIPT_INHERITED, 0x1CED },
{ G_UNICODE_SCRIPT_INHERITED, 0x0670 },
{ G_UNICODE_SCRIPT_ARABIC, 0x060D },
{ G_UNICODE_SCRIPT_ARMENIAN, 0x0559 },
{ G_UNICODE_SCRIPT_BENGALI, 0x09CD },
{ G_UNICODE_SCRIPT_BOPOMOFO, 0x31B6 },
{ G_UNICODE_SCRIPT_CHEROKEE, 0x13A2 },
{ G_UNICODE_SCRIPT_COPTIC, 0x2CFD },
{ G_UNICODE_SCRIPT_CYRILLIC, 0x0482 },
{ G_UNICODE_SCRIPT_DESERET, 0x10401 },
{ G_UNICODE_SCRIPT_DEVANAGARI, 0x094D },
{ G_UNICODE_SCRIPT_ETHIOPIC, 0x1258 },
{ G_UNICODE_SCRIPT_GEORGIAN, 0x10FC },
{ G_UNICODE_SCRIPT_GOTHIC, 0x10341 },
{ G_UNICODE_SCRIPT_GREEK, 0x0375 },
{ G_UNICODE_SCRIPT_GUJARATI, 0x0A83 },
{ G_UNICODE_SCRIPT_GURMUKHI, 0x0A3C },
{ G_UNICODE_SCRIPT_HAN, 0x3005 },
{ G_UNICODE_SCRIPT_HANGUL, 0x1100 },
{ G_UNICODE_SCRIPT_HEBREW, 0x05BF },
{ G_UNICODE_SCRIPT_HIRAGANA, 0x309F },
{ G_UNICODE_SCRIPT_KANNADA, 0x0CBC },
{ G_UNICODE_SCRIPT_KATAKANA, 0x30FF },
{ G_UNICODE_SCRIPT_KHMER, 0x17DD },
{ G_UNICODE_SCRIPT_LAO, 0x0EDD },
{ G_UNICODE_SCRIPT_LATIN, 0x0061 },
{ G_UNICODE_SCRIPT_MALAYALAM, 0x0D3D },
{ G_UNICODE_SCRIPT_MONGOLIAN, 0x1843 },
{ G_UNICODE_SCRIPT_MYANMAR, 0x1031 },
{ G_UNICODE_SCRIPT_OGHAM, 0x169C },
{ G_UNICODE_SCRIPT_OLD_ITALIC, 0x10322 },
{ G_UNICODE_SCRIPT_ORIYA, 0x0B3C },
{ G_UNICODE_SCRIPT_RUNIC, 0x16EF },
{ G_UNICODE_SCRIPT_SINHALA, 0x0DBD },
{ G_UNICODE_SCRIPT_SYRIAC, 0x0711 },
{ G_UNICODE_SCRIPT_TAMIL, 0x0B82 },
{ G_UNICODE_SCRIPT_TELUGU, 0x0C03 },
{ G_UNICODE_SCRIPT_THAANA, 0x07B1 },
{ G_UNICODE_SCRIPT_THAI, 0x0E31 },
{ G_UNICODE_SCRIPT_TIBETAN, 0x0FD4 },
{ G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 0x1400 },
{ G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 0x1401 },
{ G_UNICODE_SCRIPT_YI, 0xA015 },
{ G_UNICODE_SCRIPT_TAGALOG, 0x1700 },
{ G_UNICODE_SCRIPT_HANUNOO, 0x1720 },
{ G_UNICODE_SCRIPT_BUHID, 0x1740 },
{ G_UNICODE_SCRIPT_TAGBANWA, 0x1760 },
{ G_UNICODE_SCRIPT_BRAILLE, 0x2800 },
{ G_UNICODE_SCRIPT_CYPRIOT, 0x10808 },
{ G_UNICODE_SCRIPT_LIMBU, 0x1932 },
{ G_UNICODE_SCRIPT_OSMANYA, 0x10480 },
{ G_UNICODE_SCRIPT_SHAVIAN, 0x10450 },
{ G_UNICODE_SCRIPT_LINEAR_B, 0x10000 },
{ G_UNICODE_SCRIPT_TAI_LE, 0x1950 },
{ G_UNICODE_SCRIPT_UGARITIC, 0x1039F },
{ G_UNICODE_SCRIPT_NEW_TAI_LUE, 0x1980 },
{ G_UNICODE_SCRIPT_BUGINESE, 0x1A1F },
{ G_UNICODE_SCRIPT_GLAGOLITIC, 0x2C00 },
{ G_UNICODE_SCRIPT_TIFINAGH, 0x2D6F },
{ G_UNICODE_SCRIPT_SYLOTI_NAGRI, 0xA800 },
{ G_UNICODE_SCRIPT_OLD_PERSIAN, 0x103D0 },
{ G_UNICODE_SCRIPT_KHAROSHTHI, 0x10A3F },
{ G_UNICODE_SCRIPT_UNKNOWN, 0x1111111 },
{ G_UNICODE_SCRIPT_BALINESE, 0x1B04 },
{ G_UNICODE_SCRIPT_CUNEIFORM, 0x12000 },
{ G_UNICODE_SCRIPT_PHOENICIAN, 0x10900 },
{ G_UNICODE_SCRIPT_PHAGS_PA, 0xA840 },
{ G_UNICODE_SCRIPT_NKO, 0x07C0 },
{ G_UNICODE_SCRIPT_KAYAH_LI, 0xA900 },
{ G_UNICODE_SCRIPT_LEPCHA, 0x1C00 },
{ G_UNICODE_SCRIPT_REJANG, 0xA930 },
{ G_UNICODE_SCRIPT_SUNDANESE, 0x1B80 },
{ G_UNICODE_SCRIPT_SAURASHTRA, 0xA880 },
{ G_UNICODE_SCRIPT_CHAM, 0xAA00 },
{ G_UNICODE_SCRIPT_OL_CHIKI, 0x1C50 },
{ G_UNICODE_SCRIPT_VAI, 0xA500 },
{ G_UNICODE_SCRIPT_CARIAN, 0x102A0 },
{ G_UNICODE_SCRIPT_LYCIAN, 0x10280 },
{ G_UNICODE_SCRIPT_LYDIAN, 0x1093F },
{ G_UNICODE_SCRIPT_AVESTAN, 0x10B00 },
{ G_UNICODE_SCRIPT_BAMUM, 0xA6A0 },
{ G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, 0x13000 },
{ G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, 0x10840 },
{ G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, 0x10B60 },
{ G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, 0x10B40 },
{ G_UNICODE_SCRIPT_JAVANESE, 0xA980 },
{ G_UNICODE_SCRIPT_KAITHI, 0x11082 },
{ G_UNICODE_SCRIPT_LISU, 0xA4D0 },
{ G_UNICODE_SCRIPT_MEETEI_MAYEK, 0xABE5 },
{ G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, 0x10A60 },
{ G_UNICODE_SCRIPT_OLD_TURKIC, 0x10C00 },
{ G_UNICODE_SCRIPT_SAMARITAN, 0x0800 },
{ G_UNICODE_SCRIPT_TAI_THAM, 0x1A20 },
{ G_UNICODE_SCRIPT_TAI_VIET, 0xAA80 },
{ G_UNICODE_SCRIPT_BATAK, 0x1BC0 },
{ G_UNICODE_SCRIPT_BRAHMI, 0x11000 },
{ G_UNICODE_SCRIPT_MANDAIC, 0x0840 },
{ G_UNICODE_SCRIPT_CHAKMA, 0x11100 },
{ G_UNICODE_SCRIPT_MEROITIC_CURSIVE, 0x109A0 },
{ G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, 0x10980 },
{ G_UNICODE_SCRIPT_MIAO, 0x16F00 },
{ G_UNICODE_SCRIPT_SHARADA, 0x11180 },
{ G_UNICODE_SCRIPT_SORA_SOMPENG, 0x110D0 },
{ G_UNICODE_SCRIPT_TAKRI, 0x11680 },
{ G_UNICODE_SCRIPT_BASSA_VAH, 0x16AD0 },
{ G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, 0x10530 },
{ G_UNICODE_SCRIPT_DUPLOYAN, 0x1BC00 },
{ G_UNICODE_SCRIPT_ELBASAN, 0x10500 },
{ G_UNICODE_SCRIPT_GRANTHA, 0x11301 },
{ G_UNICODE_SCRIPT_KHOJKI, 0x11200 },
{ G_UNICODE_SCRIPT_KHUDAWADI, 0x112B0 },
{ G_UNICODE_SCRIPT_LINEAR_A, 0x10600 },
{ G_UNICODE_SCRIPT_MAHAJANI, 0x11150 },
{ G_UNICODE_SCRIPT_MANICHAEAN, 0x10AC0 },
{ G_UNICODE_SCRIPT_MENDE_KIKAKUI, 0x1E800 },
{ G_UNICODE_SCRIPT_MODI, 0x11600 },
{ G_UNICODE_SCRIPT_MRO, 0x16A40 },
{ G_UNICODE_SCRIPT_NABATAEAN, 0x10880 },
{ G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, 0x10A80 },
{ G_UNICODE_SCRIPT_OLD_PERMIC, 0x10350 },
{ G_UNICODE_SCRIPT_PAHAWH_HMONG, 0x16B00 },
{ G_UNICODE_SCRIPT_PALMYRENE, 0x10860 },
{ G_UNICODE_SCRIPT_PAU_CIN_HAU, 0x11AC0 },
{ G_UNICODE_SCRIPT_PSALTER_PAHLAVI, 0x10B80 },
{ G_UNICODE_SCRIPT_SIDDHAM, 0x11580 },
{ G_UNICODE_SCRIPT_TIRHUTA, 0x11480 },
{ G_UNICODE_SCRIPT_WARANG_CITI, 0x118A0 },
{ G_UNICODE_SCRIPT_CHEROKEE, 0x0AB71 },
{ G_UNICODE_SCRIPT_HATRAN, 0x108E0 },
{ G_UNICODE_SCRIPT_OLD_HUNGARIAN, 0x10C80 },
{ G_UNICODE_SCRIPT_MULTANI, 0x11280 },
{ G_UNICODE_SCRIPT_AHOM, 0x11700 },
{ G_UNICODE_SCRIPT_CUNEIFORM, 0x12480 },
{ G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS, 0x14400 },
{ G_UNICODE_SCRIPT_SIGNWRITING, 0x1D800 },
{ G_UNICODE_SCRIPT_ADLAM, 0x1E900 },
{ G_UNICODE_SCRIPT_BHAIKSUKI, 0x11C00 },
{ G_UNICODE_SCRIPT_MARCHEN, 0x11C70 },
{ G_UNICODE_SCRIPT_NEWA, 0x11400 },
{ G_UNICODE_SCRIPT_OSAGE, 0x104B0 },
{ G_UNICODE_SCRIPT_TANGUT, 0x16FE0 },
{ G_UNICODE_SCRIPT_MASARAM_GONDI, 0x11D00 },
{ G_UNICODE_SCRIPT_NUSHU, 0x1B170 },
{ G_UNICODE_SCRIPT_SOYOMBO, 0x11A50 },
{ G_UNICODE_SCRIPT_ZANABAZAR_SQUARE, 0x11A00 },
{ G_UNICODE_SCRIPT_DOGRA, 0x11800 },
{ G_UNICODE_SCRIPT_GUNJALA_GONDI, 0x11D60 },
{ G_UNICODE_SCRIPT_HANIFI_ROHINGYA, 0x10D00 },
{ G_UNICODE_SCRIPT_MAKASAR, 0x11EE0 },
{ G_UNICODE_SCRIPT_MEDEFAIDRIN, 0x16E40 },
{ G_UNICODE_SCRIPT_OLD_SOGDIAN, 0x10F00 },
{ G_UNICODE_SCRIPT_SOGDIAN, 0x10F30 },
{ G_UNICODE_SCRIPT_ELYMAIC, 0x10FE0 },
{ G_UNICODE_SCRIPT_NANDINAGARI, 0x119A0 },
{ G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG, 0x1E100 },
{ G_UNICODE_SCRIPT_WANCHO, 0x1E2C0 },
{ G_UNICODE_SCRIPT_CHORASMIAN, 0x10FB0 },
{ G_UNICODE_SCRIPT_DIVES_AKURU, 0x11900 },
{ G_UNICODE_SCRIPT_KHITAN_SMALL_SCRIPT, 0x18B00 },
{ G_UNICODE_SCRIPT_YEZIDI, 0x10E80 },
{ G_UNICODE_SCRIPT_CYPRO_MINOAN, 0x12F90 },
{ G_UNICODE_SCRIPT_OLD_UYGHUR, 0x10F70 },
{ G_UNICODE_SCRIPT_TANGSA, 0x16A70 },
{ G_UNICODE_SCRIPT_TOTO, 0x1E290 },
{ G_UNICODE_SCRIPT_VITHKUQI, 0x10570 },
{ G_UNICODE_SCRIPT_KAWI, 0x11F00 },
{ G_UNICODE_SCRIPT_NAG_MUNDARI, 0x1E4D0 },
{ G_UNICODE_SCRIPT_TODHRI, 0x105C8 },
{ G_UNICODE_SCRIPT_GARAY, 0x10D40 },
{ G_UNICODE_SCRIPT_TULU_TIGALARI, 0x11387 },
{ G_UNICODE_SCRIPT_SUNUWAR, 0x11BC0 },
{ G_UNICODE_SCRIPT_GURUNG_KHEMA, 0x16139 },
{ G_UNICODE_SCRIPT_KIRAT_RAI, 0x16D40 },
{ G_UNICODE_SCRIPT_OL_ONAL, 0x1E5D0 },
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
g_assert_cmpint (g_unichar_get_script (examples[i].c), ==, examples[i].script);
}
/* Test that g_unichar_combining_class() returns the correct value for
* various ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_combining_class (void)
{
guint i;
struct {
gint class;
gunichar c;
} examples[] = {
{ 0, 0x0020 },
{ 1, 0x0334 },
{ 7, 0x093C },
{ 8, 0x3099 },
{ 9, 0x094D },
{ 10, 0x05B0 },
{ 11, 0x05B1 },
{ 12, 0x05B2 },
{ 13, 0x05B3 },
{ 14, 0x05B4 },
{ 15, 0x05B5 },
{ 16, 0x05B6 },
{ 17, 0x05B7 },
{ 18, 0x05B8 },
{ 19, 0x05B9 },
{ 20, 0x05BB },
{ 21, 0x05BC },
{ 22, 0x05BD },
{ 23, 0x05BF },
{ 24, 0x05C1 },
{ 25, 0x05C2 },
{ 26, 0xFB1E },
{ 27, 0x064B },
{ 28, 0x064C },
{ 29, 0x064D },
/* ... */
{ 228, 0x05AE },
{ 230, 0x0300 },
{ 232, 0x302C },
{ 233, 0x0362 },
{ 234, 0x0360 },
{ 234, 0x1DCD },
{ 240, 0x0345 },
/* These are all (currently) unassigned, but exercise various branches in
* the combining class lookup tables: */
{ 0, 0x323FF },
{ 0, 0x32400 },
{ 0, 0xDFFFF },
{ 0, 0xE0000 },
{ 0, G_UNICODE_LAST_CHAR },
{ 0, G_UNICODE_LAST_CHAR + 1 },
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
{
g_assert_cmpint (g_unichar_combining_class (examples[i].c), ==, examples[i].class);
}
}
/* Test that g_unichar_get_mirror() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_mirror (void)
{
gunichar mirror;
g_assert_true (g_unichar_get_mirror_char ('(', &mirror));
g_assert_cmpint (mirror, ==, ')');
g_assert_true (g_unichar_get_mirror_char (')', &mirror));
g_assert_cmpint (mirror, ==, '(');
g_assert_true (g_unichar_get_mirror_char ('{', &mirror));
g_assert_cmpint (mirror, ==, '}');
g_assert_true (g_unichar_get_mirror_char ('}', &mirror));
g_assert_cmpint (mirror, ==, '{');
g_assert_true (g_unichar_get_mirror_char (0x208D, &mirror));
g_assert_cmpint (mirror, ==, 0x208E);
g_assert_true (g_unichar_get_mirror_char (0x208E, &mirror));
g_assert_cmpint (mirror, ==, 0x208D);
g_assert_false (g_unichar_get_mirror_char ('a', &mirror));
}
/* Test that g_utf8_strup() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_strup (void)
{
char *str_up = NULL;
const char *str = "AaZz09x;\x03\x45"
"\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */
"\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
/* Testing degenerated cases */
if (g_test_undefined ())
{
g_test_expect_message (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
"*assertion*!= NULL*");
str_up = g_utf8_strup (NULL, 0);
g_test_assert_expected_messages ();
}
str_up = g_utf8_strup (str, strlen (str));
/* Tricky, comparing two unicode strings with an ASCII function */
g_assert_cmpstr (str_up, ==, "AAZZ09X;\003E\357\274\241\357\274\241");
g_free (str_up);
str_up = g_utf8_strup ("", 0);
g_assert_cmpstr (str_up, ==, "");
g_free (str_up);
}
/* Test that g_utf8_strdown() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_strdown (void)
{
char *str_down = NULL;
const char *str = "AaZz09x;\x03\x07"
"\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */
"\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
/* Testing degenerated cases */
if (g_test_undefined ())
{
g_test_expect_message (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
"*assertion*!= NULL*");
str_down = g_utf8_strdown (NULL, 0);
g_test_assert_expected_messages ();
}
str_down = g_utf8_strdown (str, strlen (str));
/* Tricky, comparing two unicode strings with an ASCII function */
g_assert_cmpstr (str_down, ==, "aazz09x;\003\007\357\275\201\357\275\201");
g_free (str_down);
str_down = g_utf8_strdown ("", 0);
g_assert_cmpstr (str_down, ==, "");
g_free (str_down);
}
/* Test that g_utf8_strup() and g_utf8_strdown() return the correct
* value for Turkish 'i' with and without dot above. */
static void
test_turkish_strupdown (void)
{
char *str_up = NULL;
char *str_down = NULL;
const char *str = "iII"
"\xcc\x87" /* COMBINING DOT ABOVE (U+307) */
"\xc4\xb1" /* LATIN SMALL LETTER DOTLESS I (U+131) */
"\xc4\xb0"; /* LATIN CAPITAL LETTER I WITH DOT ABOVE (U+130) */
char *oldlocale;
char *old_lc_all, *old_lc_messages, *old_lang;
#ifdef G_OS_WIN32
LCID old_lcid;
#endif
/* interferes with g_win32_getlocale() */
save_and_clear_env ("LC_ALL", &old_lc_all);
save_and_clear_env ("LC_MESSAGES", &old_lc_messages);
save_and_clear_env ("LANG", &old_lang);
oldlocale = g_strdup (setlocale (LC_ALL, "tr_TR"));
if (oldlocale == NULL)
{
g_test_skip ("locale tr_TR not available");
g_free (old_lang);
return;
}
#ifdef G_OS_WIN32
old_lcid = GetThreadLocale ();
SetThreadLocale (MAKELCID (MAKELANGID (LANG_TURKISH, SUBLANG_TURKISH_TURKEY), SORT_DEFAULT));
#endif
str_up = g_utf8_strup (str, strlen (str));
str_down = g_utf8_strdown (str, strlen (str));
/* i => LATIN CAPITAL LETTER I WITH DOT ABOVE,
* I => I,
* I + COMBINING DOT ABOVE => I + COMBINING DOT ABOVE,
* LATIN SMALL LETTER DOTLESS I => I,
* LATIN CAPITAL LETTER I WITH DOT ABOVE => LATIN CAPITAL LETTER I WITH DOT ABOVE */
g_assert_cmpstr (str_up, ==, "\xc4\xb0II\xcc\x87I\xc4\xb0");
/* i => i,
* I => LATIN SMALL LETTER DOTLESS I,
* I + COMBINING DOT ABOVE => i,
* LATIN SMALL LETTER DOTLESS I => LATIN SMALL LETTER DOTLESS I,
* LATIN CAPITAL LETTER I WITH DOT ABOVE => i */
g_assert_cmpstr (str_down, ==, "i\xc4\xb1i\xc4\xb1i");
g_free (str_up);
g_free (str_down);
setlocale (LC_ALL, oldlocale);
#ifdef G_OS_WIN32
SetThreadLocale (old_lcid);
#endif
g_free (oldlocale);
if (old_lc_all)
g_setenv ("LC_ALL", old_lc_all, TRUE);
if (old_lc_messages)
g_setenv ("LC_MESSAGES", old_lc_messages, TRUE);
if (old_lang)
g_setenv ("LANG", old_lang, TRUE);
g_free (old_lc_all);
g_free (old_lc_messages);
g_free (old_lang);
}
/* Test that g_utf8_casefold() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_casefold (void)
{
char *str_casefold = NULL;
const char *str = "AaZz09x;"
"\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */
"\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
/* Testing degenerated cases */
if (g_test_undefined ())
{
g_test_expect_message (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
"*assertion*!= NULL*");
str_casefold = g_utf8_casefold (NULL, 0);
g_test_assert_expected_messages ();
}
str_casefold = g_utf8_casefold (str, strlen (str));
/* Tricky, comparing two unicode strings with an ASCII function */
g_assert_cmpstr (str_casefold, ==, "aazz09x;\357\275\201\357\275\201");
g_free (str_casefold);
str_casefold = g_utf8_casefold ("", 0);
g_assert_cmpstr (str_casefold, ==, "");
g_free (str_casefold);
}
static void
test_casemap_and_casefold (void)
{
FILE *infile;
char buffer[1024];
char **strings;
char *filename;
const char *locale;
const char *test;
const char *expected;
char *convert;
char *current_locale = setlocale (LC_CTYPE, NULL);
char *old_lc_all, *old_lc_messages, *old_lang;
#ifdef G_OS_WIN32
LCID old_lcid;
old_lcid = GetThreadLocale ();
#endif
/* interferes with g_win32_getlocale() */
save_and_clear_env ("LC_ALL", &old_lc_all);
save_and_clear_env ("LC_MESSAGES", &old_lc_messages);
save_and_clear_env ("LANG", &old_lang);
filename = g_test_build_filename (G_TEST_DIST, "casemap.txt", NULL);
infile = fopen (filename, "r");
g_assert (infile != NULL);
while (fgets (buffer, sizeof (buffer), infile))
{
if (buffer[0] == '#')
continue;
strings = g_strsplit (buffer, "\t", -1);
locale = strings[0];
if (!locale[0])
locale = "C";
if (strcmp (locale, current_locale) != 0)
{
setlocale (LC_CTYPE, locale);
current_locale = setlocale (LC_CTYPE, NULL);
if (strncmp (current_locale, locale, 2) != 0)
{
g_test_message ("Cannot set locale to %s, skipping", locale);
goto next;
}
}
#ifdef G_OS_WIN32
if (strstr (locale, "lt_LT"))
SetThreadLocale (MAKELCID (MAKELANGID (LANG_LITHUANIAN, SUBLANG_LITHUANIAN), SORT_DEFAULT));
else if (strstr (locale, "tr_TR"))
SetThreadLocale (MAKELCID (MAKELANGID (LANG_TURKISH, SUBLANG_TURKISH_TURKEY), SORT_DEFAULT));
else
SetThreadLocale (old_lcid);
#endif
test = strings[1];
/* gen-casemap-txt.py uses an empty string when a single
* character doesn't have an equivalent in a particular case;
* since that behavior is nonsense for multicharacter strings,
* it would make more sense to put the expected result ... the
* original character unchanged. But for now, we just work
* around it here and take the empty string to mean "same as
* original"
*/
convert = g_utf8_strup (test, -1);
expected = strings[4][0] ? strings[4] : test;
g_assert_cmpstr (convert, ==, expected);
g_free (convert);
convert = g_utf8_strdown (test, -1);
expected = strings[2][0] ? strings[2] : test;
g_assert_cmpstr (convert, ==, expected);
g_free (convert);
next:
g_strfreev (strings);
}
fclose (infile);
g_free (filename);
filename = g_test_build_filename (G_TEST_DIST, "casefold.txt", NULL);
infile = fopen (filename, "r");
g_assert (infile != NULL);
while (fgets (buffer, sizeof (buffer), infile))
{
if (buffer[0] == '#')
continue;
buffer[strlen (buffer) - 1] = '\0';
strings = g_strsplit (buffer, "\t", -1);
test = strings[0];
convert = g_utf8_casefold (test, -1);
g_assert_cmpstr (convert, ==, strings[1]);
g_free (convert);
g_strfreev (strings);
}
fclose (infile);
g_free (filename);
if (old_lc_all)
g_setenv ("LC_ALL", old_lc_all, TRUE);
if (old_lc_messages)
g_setenv ("LC_MESSAGES", old_lc_messages, TRUE);
if (old_lang)
g_setenv ("LANG", old_lang, TRUE);
g_free (old_lc_all);
g_free (old_lc_messages);
g_free (old_lang);
#ifdef G_OS_WIN32
SetThreadLocale (old_lcid);
#endif
}
/* Test that g_unichar_ismark() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_mark (void)
{
g_assert_true (g_unichar_ismark (0x0903));
g_assert_true (g_unichar_ismark (0x20DD));
g_assert_true (g_unichar_ismark (0xA806));
g_assert_false (g_unichar_ismark ('a'));
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_ismark (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_ismark (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_ismark (0xE0001));
g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isspace() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_space (void)
{
g_assert_false (g_unichar_isspace ('a'));
g_assert_true (g_unichar_isspace (' '));
g_assert_true (g_unichar_isspace ('\t'));
g_assert_true (g_unichar_isspace ('\n'));
g_assert_true (g_unichar_isspace ('\r'));
g_assert_true (g_unichar_isspace ('\f'));
g_assert_false (g_unichar_isspace (0xff41)); /* Unicode fullwidth 'a' */
g_assert_true (g_unichar_isspace (0x202F)); /* Unicode space separator */
g_assert_true (g_unichar_isspace (0x2028)); /* Unicode line separator */
g_assert_true (g_unichar_isspace (0x2029)); /* Unicode paragraph separator */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_isspace (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isspace (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isspace (0xE0001));
g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isalnum() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_alnum (void)
{
g_assert_false (g_unichar_isalnum (' '));
g_assert_true (g_unichar_isalnum ('a'));
g_assert_true (g_unichar_isalnum ('z'));
g_assert_true (g_unichar_isalnum ('0'));
g_assert_true (g_unichar_isalnum ('9'));
g_assert_true (g_unichar_isalnum ('A'));
g_assert_true (g_unichar_isalnum ('Z'));
g_assert_false (g_unichar_isalnum ('-'));
g_assert_false (g_unichar_isalnum ('*'));
g_assert_true (g_unichar_isalnum (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_true (g_unichar_isalnum (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_true (g_unichar_isalnum (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_true (g_unichar_isalnum (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_true (g_unichar_isalnum (0xFF10)); /* Unichar fullwidth '0' */
g_assert_true (g_unichar_isalnum (0xFF19)); /* Unichar fullwidth '9' */
g_assert_false (g_unichar_isalnum (0xFF0A)); /* Unichar fullwidth '*' */
/*** Testing TYPE() border cases ***/
g_assert_true (g_unichar_isalnum (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isalnum (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isalnum (0xE0001));
g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isalpha() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_alpha (void)
{
g_assert_false (g_unichar_isalpha (' '));
g_assert_true (g_unichar_isalpha ('a'));
g_assert_true (g_unichar_isalpha ('z'));
g_assert_false (g_unichar_isalpha ('0'));
g_assert_false (g_unichar_isalpha ('9'));
g_assert_true (g_unichar_isalpha ('A'));
g_assert_true (g_unichar_isalpha ('Z'));
g_assert_false (g_unichar_isalpha ('-'));
g_assert_false (g_unichar_isalpha ('*'));
g_assert_true (g_unichar_isalpha (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_true (g_unichar_isalpha (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_true (g_unichar_isalpha (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_true (g_unichar_isalpha (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_false (g_unichar_isalpha (0xFF10)); /* Unichar fullwidth '0' */
g_assert_false (g_unichar_isalpha (0xFF19)); /* Unichar fullwidth '9' */
g_assert_false (g_unichar_isalpha (0xFF0A)); /* Unichar fullwidth '*' */
/*** Testing TYPE() border cases ***/
g_assert_true (g_unichar_isalpha (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isalpha (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isalpha (0xE0001));
g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isdigit() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_digit (void)
{
g_assert_false (g_unichar_isdigit (' '));
g_assert_false (g_unichar_isdigit ('a'));
g_assert_true (g_unichar_isdigit ('0'));
g_assert_true (g_unichar_isdigit ('9'));
g_assert_false (g_unichar_isdigit ('A'));
g_assert_false (g_unichar_isdigit ('-'));
g_assert_false (g_unichar_isdigit ('*'));
g_assert_false (g_unichar_isdigit (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_false (g_unichar_isdigit (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_false (g_unichar_isdigit (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_false (g_unichar_isdigit (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_true (g_unichar_isdigit (0xFF10)); /* Unichar fullwidth '0' */
g_assert_true (g_unichar_isdigit (0xFF19)); /* Unichar fullwidth '9' */
g_assert_false (g_unichar_isdigit (0xFF0A)); /* Unichar fullwidth '*' */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_isdigit (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isdigit (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isdigit (0xE0001));
g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_digit_value() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_digit_value (void)
{
g_assert_cmpint (g_unichar_digit_value (' '), ==, -1);
g_assert_cmpint (g_unichar_digit_value ('a'), ==, -1);
g_assert_cmpint (g_unichar_digit_value ('0'), ==, 0);
g_assert_cmpint (g_unichar_digit_value ('9'), ==, 9);
g_assert_cmpint (g_unichar_digit_value ('A'), ==, -1);
g_assert_cmpint (g_unichar_digit_value ('-'), ==, -1);
g_assert_cmpint (g_unichar_digit_value (0xFF21), ==, -1); /* Unichar 'A' */
g_assert_cmpint (g_unichar_digit_value (0xFF3A), ==, -1); /* Unichar 'Z' */
g_assert_cmpint (g_unichar_digit_value (0xFF41), ==, -1); /* Unichar 'a' */
g_assert_cmpint (g_unichar_digit_value (0xFF5A), ==, -1); /* Unichar 'z' */
g_assert_cmpint (g_unichar_digit_value (0xFF10), ==, 0); /* Unichar '0' */
g_assert_cmpint (g_unichar_digit_value (0xFF19), ==, 9); /* Unichar '9' */
g_assert_cmpint (g_unichar_digit_value (0xFF0A), ==, -1); /* Unichar '*' */
/*** Testing TYPE() border cases ***/
g_assert_cmpint (g_unichar_digit_value (0x3FF5), ==, -1);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmpint (g_unichar_digit_value (0xFFEFF), ==, -1);
/* U+E0001 Language Tag */
g_assert_cmpint (g_unichar_digit_value (0xE0001), ==, -1);
g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR), ==, -1);
g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR + 1), ==, -1);
g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR_PART1), ==, -1);
g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1);
}
/* Test that g_unichar_isxdigit() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_xdigit (void)
{
g_assert_false (g_unichar_isxdigit (' '));
g_assert_true (g_unichar_isxdigit ('a'));
g_assert_true (g_unichar_isxdigit ('f'));
g_assert_false (g_unichar_isxdigit ('g'));
g_assert_false (g_unichar_isxdigit ('z'));
g_assert_true (g_unichar_isxdigit ('0'));
g_assert_true (g_unichar_isxdigit ('9'));
g_assert_true (g_unichar_isxdigit ('A'));
g_assert_true (g_unichar_isxdigit ('F'));
g_assert_false (g_unichar_isxdigit ('G'));
g_assert_false (g_unichar_isxdigit ('Z'));
g_assert_false (g_unichar_isxdigit ('-'));
g_assert_false (g_unichar_isxdigit ('*'));
g_assert_true (g_unichar_isxdigit (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_true (g_unichar_isxdigit (0xFF26)); /* Unichar fullwidth 'F' */
g_assert_false (g_unichar_isxdigit (0xFF27)); /* Unichar fullwidth 'G' */
g_assert_false (g_unichar_isxdigit (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_true (g_unichar_isxdigit (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_true (g_unichar_isxdigit (0xFF46)); /* Unichar fullwidth 'f' */
g_assert_false (g_unichar_isxdigit (0xFF47)); /* Unichar fullwidth 'g' */
g_assert_false (g_unichar_isxdigit (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_true (g_unichar_isxdigit (0xFF10)); /* Unichar fullwidth '0' */
g_assert_true (g_unichar_isxdigit (0xFF19)); /* Unichar fullwidth '9' */
g_assert_false (g_unichar_isxdigit (0xFF0A)); /* Unichar fullwidth '*' */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_isxdigit (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isxdigit (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isxdigit (0xE0001));
g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_xdigit_value() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_xdigit_value (void)
{
g_assert_cmpint (g_unichar_xdigit_value (' '), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value ('a'), ==, 10);
g_assert_cmpint (g_unichar_xdigit_value ('f'), ==, 15);
g_assert_cmpint (g_unichar_xdigit_value ('g'), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value ('0'), ==, 0);
g_assert_cmpint (g_unichar_xdigit_value ('9'), ==, 9);
g_assert_cmpint (g_unichar_xdigit_value ('A'), ==, 10);
g_assert_cmpint (g_unichar_xdigit_value ('F'), ==, 15);
g_assert_cmpint (g_unichar_xdigit_value ('G'), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value ('-'), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value (0xFF21), ==, 10); /* Unichar 'A' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF26), ==, 15); /* Unichar 'F' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF27), ==, -1); /* Unichar 'G' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF3A), ==, -1); /* Unichar 'Z' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF41), ==, 10); /* Unichar 'a' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF46), ==, 15); /* Unichar 'f' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF47), ==, -1); /* Unichar 'g' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF5A), ==, -1); /* Unichar 'z' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF10), ==, 0); /* Unichar '0' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF19), ==, 9); /* Unichar '9' */
g_assert_cmpint (g_unichar_xdigit_value (0xFF0A), ==, -1); /* Unichar '*' */
/*** Testing TYPE() border cases ***/
g_assert_cmpint (g_unichar_xdigit_value (0x3FF5), ==, -1);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmpint (g_unichar_xdigit_value (0xFFEFF), ==, -1);
/* U+E0001 Language Tag */
g_assert_cmpint (g_unichar_xdigit_value (0xE0001), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR + 1), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1), ==, -1);
g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1);
}
/* Test that g_unichar_ispunct() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_punctuation (void)
{
g_assert_false (g_unichar_ispunct (' '));
g_assert_false (g_unichar_ispunct ('a'));
g_assert_true (g_unichar_ispunct ('.'));
g_assert_true (g_unichar_ispunct (','));
g_assert_true (g_unichar_ispunct (';'));
g_assert_true (g_unichar_ispunct (':'));
g_assert_true (g_unichar_ispunct ('-'));
g_assert_false (g_unichar_ispunct (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_true (g_unichar_ispunct (0x005F)); /* Unichar fullwidth '.' */
g_assert_true (g_unichar_ispunct (0x058A)); /* Unichar fullwidth '-' */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_ispunct (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_ispunct (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_ispunct (0xE0001));
g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_iscntrl() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_cntrl (void)
{
g_assert_true (g_unichar_iscntrl (0x08));
g_assert_false (g_unichar_iscntrl ('a'));
g_assert_true (g_unichar_iscntrl (0x007F)); /* Unichar fullwidth <del> */
g_assert_true (g_unichar_iscntrl (0x009F)); /* Unichar fullwidth control */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_iscntrl (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_iscntrl (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_iscntrl (0xE0001));
g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isgraph() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_graph (void)
{
g_assert_false (g_unichar_isgraph (0x08));
g_assert_false (g_unichar_isgraph (' '));
g_assert_true (g_unichar_isgraph ('a'));
g_assert_true (g_unichar_isgraph ('0'));
g_assert_true (g_unichar_isgraph ('9'));
g_assert_true (g_unichar_isgraph ('A'));
g_assert_true (g_unichar_isgraph ('-'));
g_assert_true (g_unichar_isgraph ('*'));
g_assert_true (g_unichar_isgraph (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_true (g_unichar_isgraph (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_true (g_unichar_isgraph (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_true (g_unichar_isgraph (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_true (g_unichar_isgraph (0xFF10)); /* Unichar fullwidth '0' */
g_assert_true (g_unichar_isgraph (0xFF19)); /* Unichar fullwidth '9' */
g_assert_true (g_unichar_isgraph (0xFF0A)); /* Unichar fullwidth '*' */
g_assert_false (g_unichar_isgraph (0x007F)); /* Unichar fullwidth <del> */
g_assert_false (g_unichar_isgraph (0x009F)); /* Unichar fullwidth control */
/*** Testing TYPE() border cases ***/
g_assert_true (g_unichar_isgraph (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_true (g_unichar_isgraph (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isgraph (0xE0001));
g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_iszerowidth() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_zerowidth (void)
{
g_assert_false (g_unichar_iszerowidth (0x00AD));
g_assert_false (g_unichar_iszerowidth (0x115F));
g_assert_true (g_unichar_iszerowidth (0x1160));
g_assert_true (g_unichar_iszerowidth (0x11AA));
g_assert_true (g_unichar_iszerowidth (0x11FF));
g_assert_false (g_unichar_iszerowidth (0x1200));
g_assert_false (g_unichar_iszerowidth (0x200A));
g_assert_true (g_unichar_iszerowidth (0x200B));
g_assert_true (g_unichar_iszerowidth (0x200C));
g_assert_true (g_unichar_iszerowidth (0x591));
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_iszerowidth (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_iszerowidth (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_true (g_unichar_iszerowidth (0xE0001));
g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR_PART1 + 1));
/* Hangul Jamo Extended-B block, containing jungseong and jongseong for
* Old Korean */
g_assert_true (g_unichar_iszerowidth (0xD7B0));
g_assert_true (g_unichar_iszerowidth (0xD7FB));
}
/* Test that g_unichar_istitle() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_title (void)
{
g_assert_true (g_unichar_istitle (0x01c5));
g_assert_true (g_unichar_istitle (0x1f88));
g_assert_true (g_unichar_istitle (0x1fcc));
g_assert_false (g_unichar_istitle ('a'));
g_assert_false (g_unichar_istitle ('A'));
g_assert_false (g_unichar_istitle (';'));
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_istitle (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_istitle (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_istitle (0xE0001));
g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR_PART1 + 1));
g_assert_cmphex (g_unichar_totitle (0x0000), ==, 0x0000);
g_assert_cmphex (g_unichar_totitle (0x01c6), ==, 0x01c5);
g_assert_cmphex (g_unichar_totitle (0x01c4), ==, 0x01c5);
g_assert_cmphex (g_unichar_totitle (0x01c5), ==, 0x01c5);
g_assert_cmphex (g_unichar_totitle (0x1f80), ==, 0x1f88);
g_assert_cmphex (g_unichar_totitle (0x1f88), ==, 0x1f88);
g_assert_cmphex (g_unichar_totitle ('a'), ==, 'A');
g_assert_cmphex (g_unichar_totitle ('A'), ==, 'A');
/*** Testing TYPE() border cases ***/
g_assert_cmphex (g_unichar_totitle (0x3FF5), ==, 0x3FF5);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmphex (g_unichar_totitle (0xFFEFF), ==, 0xFFEFF);
g_assert_cmphex (g_unichar_totitle (0xDFFFF), ==, 0xDFFFF);
/* U+E0001 Language Tag */
g_assert_cmphex (g_unichar_totitle (0xE0001), ==, 0xE0001);
g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR), ==,
G_UNICODE_LAST_CHAR);
g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR + 1), ==,
(G_UNICODE_LAST_CHAR + 1));
g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR_PART1), ==,
(G_UNICODE_LAST_CHAR_PART1));
g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
(G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isupper() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_upper (void)
{
g_assert_false (g_unichar_isupper (' '));
g_assert_false (g_unichar_isupper ('0'));
g_assert_false (g_unichar_isupper ('a'));
g_assert_true (g_unichar_isupper ('A'));
g_assert_false (g_unichar_isupper (0xff41)); /* Unicode fullwidth 'a' */
g_assert_true (g_unichar_isupper (0xff21)); /* Unicode fullwidth 'A' */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_isupper (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isupper (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isupper (0xE0001));
g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_islower() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_lower (void)
{
g_assert_false (g_unichar_islower (' '));
g_assert_false (g_unichar_islower ('0'));
g_assert_true (g_unichar_islower ('a'));
g_assert_false (g_unichar_islower ('A'));
g_assert_true (g_unichar_islower (0xff41)); /* Unicode fullwidth 'a' */
g_assert_false (g_unichar_islower (0xff21)); /* Unicode fullwidth 'A' */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_islower (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_islower (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_islower (0xE0001));
g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isprint() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_print (void)
{
g_assert_true (g_unichar_isprint (' '));
g_assert_true (g_unichar_isprint ('0'));
g_assert_true (g_unichar_isprint ('a'));
g_assert_true (g_unichar_isprint ('A'));
g_assert_true (g_unichar_isprint (0xff41)); /* Unicode fullwidth 'a' */
g_assert_true (g_unichar_isprint (0xff21)); /* Unicode fullwidth 'A' */
/*** Testing TYPE() border cases ***/
g_assert_true (g_unichar_isprint (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_true (g_unichar_isprint (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isprint (0xE0001));
g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_toupper() and g_unichar_tolower() return the
* correct values for various ASCII and Unicode alphabetic, numeric,
* and other, codepoints. */
static void
test_cases (void)
{
g_assert_cmphex (g_unichar_toupper (0x0), ==, 0x0);
g_assert_cmphex (g_unichar_tolower (0x0), ==, 0x0);
g_assert_cmphex (g_unichar_toupper ('a'), ==, 'A');
g_assert_cmphex (g_unichar_toupper ('A'), ==, 'A');
/* Unicode fullwidth 'a' == 'A' */
g_assert_cmphex (g_unichar_toupper (0xff41), ==, 0xff21);
/* Unicode fullwidth 'A' == 'A' */
g_assert_cmphex (g_unichar_toupper (0xff21), ==, 0xff21);
g_assert_cmphex (g_unichar_toupper (0x01C5), ==, 0x01C4);
g_assert_cmphex (g_unichar_toupper (0x01C6), ==, 0x01C4);
g_assert_cmphex (g_unichar_tolower ('A'), ==, 'a');
g_assert_cmphex (g_unichar_tolower ('a'), ==, 'a');
/* Unicode fullwidth 'A' == 'a' */
g_assert_cmphex (g_unichar_tolower (0xff21), ==, 0xff41);
/* Unicode fullwidth 'a' == 'a' */
g_assert_cmphex (g_unichar_tolower (0xff41), ==, 0xff41);
g_assert_cmphex (g_unichar_tolower (0x01C4), ==, 0x01C6);
g_assert_cmphex (g_unichar_tolower (0x01C5), ==, 0x01C6);
g_assert_cmphex (g_unichar_tolower (0x1F8A), ==, 0x1F82);
g_assert_cmphex (g_unichar_totitle (0x1F8A), ==, 0x1F8A);
g_assert_cmphex (g_unichar_toupper (0x1F8A), ==, 0x1F8A);
g_assert_cmphex (g_unichar_tolower (0x1FB2), ==, 0x1FB2);
g_assert_cmphex (g_unichar_toupper (0x1FB2), ==, 0x1FB2);
/* U+130 is a special case, it's a 'I' with a dot on top */
g_assert_cmphex (g_unichar_tolower (0x130), ==, 0x69);
/* Testing ATTTABLE() border cases */
g_assert_cmphex (g_unichar_toupper (0x1D6FE), ==, 0x1D6FE);
/*** Testing TYPE() border cases ***/
g_assert_cmphex (g_unichar_toupper (0x3FF5), ==, 0x3FF5);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmphex (g_unichar_toupper (0xFFEFF), ==, 0xFFEFF);
g_assert_cmphex (g_unichar_toupper (0xDFFFF), ==, 0xDFFFF);
/* U+E0001 Language Tag */
g_assert_cmphex (g_unichar_toupper (0xE0001), ==, 0xE0001);
g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR), ==,
G_UNICODE_LAST_CHAR);
g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR + 1), ==,
(G_UNICODE_LAST_CHAR + 1));
g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR_PART1), ==,
(G_UNICODE_LAST_CHAR_PART1));
g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
(G_UNICODE_LAST_CHAR_PART1 + 1));
/* Testing ATTTABLE() border cases */
g_assert_cmphex (g_unichar_tolower (0x1D6FA), ==, 0x1D6FA);
/*** Testing TYPE() border cases ***/
g_assert_cmphex (g_unichar_tolower (0x3FF5), ==, 0x3FF5);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmphex (g_unichar_tolower (0xFFEFF), ==, 0xFFEFF);
g_assert_cmphex (g_unichar_tolower (0xDFFFF), ==, 0xDFFFF);
/* U+E0001 Language Tag */
g_assert_cmphex (g_unichar_tolower (0xE0001), ==, 0xE0001);
g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR), ==,
G_UNICODE_LAST_CHAR);
g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR + 1), ==,
(G_UNICODE_LAST_CHAR + 1));
g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR_PART1), ==,
G_UNICODE_LAST_CHAR_PART1);
g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
(G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_isdefined() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_defined (void)
{
g_assert_true (g_unichar_isdefined (0x0903));
g_assert_true (g_unichar_isdefined (0x20DD));
g_assert_true (g_unichar_isdefined (0x20BA));
g_assert_true (g_unichar_isdefined (0xA806));
g_assert_true (g_unichar_isdefined ('a'));
g_assert_false (g_unichar_isdefined (0x10C49));
g_assert_false (g_unichar_isdefined (0x169D));
/*** Testing TYPE() border cases ***/
g_assert_true (g_unichar_isdefined (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_true (g_unichar_isdefined (0xFFEFF));
g_assert_false (g_unichar_isdefined (0xDFFFF));
/* U+E0001 Language Tag */
g_assert_true (g_unichar_isdefined (0xE0001));
g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR_PART1 + 1));
}
/* Test that g_unichar_iswide() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_wide (void)
{
guint i;
struct {
gunichar c;
enum {
NOT_WIDE,
WIDE_CJK,
WIDE
} wide;
} examples[] = {
/* Neutral */
{ 0x0000, NOT_WIDE },
{ 0x0483, NOT_WIDE },
{ 0x0641, NOT_WIDE },
{ 0xFFFC, NOT_WIDE },
{ 0x10000, NOT_WIDE },
{ 0xE0001, NOT_WIDE },
{ 0x2FFFE, NOT_WIDE },
{ 0x3FFFE, NOT_WIDE },
/* Narrow */
{ 0x0020, NOT_WIDE },
{ 0x0041, NOT_WIDE },
{ 0x27E6, NOT_WIDE },
/* Halfwidth */
{ 0x20A9, NOT_WIDE },
{ 0xFF61, NOT_WIDE },
{ 0xFF69, NOT_WIDE },
{ 0xFFEE, NOT_WIDE },
/* Ambiguous */
{ 0x00A1, WIDE_CJK },
{ 0x00BE, WIDE_CJK },
{ 0x02DD, WIDE_CJK },
{ 0x2020, WIDE_CJK },
{ 0xFFFD, WIDE_CJK },
{ 0x00A1, WIDE_CJK },
{ 0x1F100, WIDE_CJK },
{ 0xE0100, WIDE_CJK },
{ 0x100000, WIDE_CJK },
{ 0x10FFFD, WIDE_CJK },
/* Fullwidth */
{ 0x3000, WIDE },
{ 0xFF60, WIDE },
/* Wide */
{ 0x2329, WIDE },
{ 0x3001, WIDE },
{ 0xFE69, WIDE },
{ 0x30000, WIDE },
{ 0x3FFFD, WIDE },
/* Default Wide blocks */
{ 0x4DBF, WIDE },
{ 0x9FFF, WIDE },
{ 0xFAFF, WIDE },
{ 0x2A6DF, WIDE },
{ 0x2B73F, WIDE },
{ 0x2B81F, WIDE },
{ 0x2FA1F, WIDE },
/* Uniode-5.2 character additions */
/* Wide */
{ 0x115F, WIDE },
/* Uniode-6.0 character additions */
/* Wide */
{ 0x2B740, WIDE },
{ 0x1B000, WIDE },
{ 0x111111, NOT_WIDE }
};
for (i = 0; i < G_N_ELEMENTS (examples); i++)
{
g_assert_cmpint (g_unichar_iswide (examples[i].c), ==,
(examples[i].wide == WIDE));
g_assert_cmpint (g_unichar_iswide_cjk (examples[i].c), ==,
(examples[i].wide != NOT_WIDE));
}
};
/* Test that g_unichar_compose() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_compose (void)
{
const struct
{
gunichar a;
gunichar b;
gunichar expected_result; /* 0 for failure */
}
vectors[] =
{
/* Not composable */
{ 0x0041, 0x0042, 0 },
{ 0x0041, 0x0000, 0 },
{ 0x0066, 0x0069, 0 },
/* Tricky non-composable */
{ 0x0308, 0x0301, 0 }, /* !0x0344 */
{ 0x0F71, 0x0F72, 0 }, /* !0x0F73 */
/* Singletons should not compose */
{ 0x212B, 0x0000, 0 },
{ 0x00C5, 0x0000, 0 },
{ 0x2126, 0x0000, 0 },
{ 0x03A9, 0x0000, 0 },
/* Pairs */
{ 0x0041, 0x030A, 0x00C5 },
{ 0x006F, 0x0302, 0x00F4 },
{ 0x1E63, 0x0307, 0x1E69 },
{ 0x0073, 0x0323, 0x1E63 },
{ 0x0064, 0x0307, 0x1E0B },
{ 0x0064, 0x0323, 0x1E0D },
/* Hangul */
{ 0xD4CC, 0x11B6, 0xD4DB },
{ 0x1111, 0x1171, 0xD4CC },
{ 0xCE20, 0x11B8, 0xCE31 },
{ 0x110E, 0x1173, 0xCE20 },
/* Hangul non-compositions (testing various exit conditions in combine_hangul()) */
{ 0x1100, 0x1160, 0 },
{ 0x1100, 0x1177, 0 },
{ 0xABFF, 0x11B6, 0 },
{ 0xD7A5, 0x11B6, 0 },
{ 0xAC01, 0x11B6, 0 },
{ 0xD4CC, 0x11A6, 0 },
{ 0xD4CC, 0x11C4, 0 },
/* Primary composite above U+FFFF (a significant boundary value in our implementation) */
{ 0x1611E, 0x1611E, 0x16121 }, /* first and second char equal */
{ 0x1611E, 0x1611F, 0x16123 },
/* First singletons */
{ 0x00F6, 0x0304, 0x022B },
/* Second singletons */
{ 0x0B47, 0x0B57, 0x0B4C },
{ 0x00A0, 0x0B57, 0 },
/* Very high values (exercising some branches in COMPOSE_INDEX) */
{ 0x16E00, 0x030A, 0 },
{ 0x212B, 0x16E00, 0 },
/* Exercise some failure paths in the lookup tables */
{ 0x1E63, 0x0306, 0 },
{ 0x1E63, 0x0304, 0 },
{ 0x1E63, 0x0B57, 0 },
{ 0x1E63, 0x0000, 0 },
{ 0x1E63, 0x113C2, 0 },
{ 0x1F01, 0x113C2, 0 },
{ 0x006E, 0x0302, 0 },
{ 0x1E63, 0x1611F, 0 },
{ 0x1138E, 0x113B8, 0 },
{ 0x1611E, 0x0000, 0 },
{ 0x0000, 0x1611F, 0 },
{ 0x11390, 0x113C2, 0 },
};
for (size_t i = 0; i < G_N_ELEMENTS (vectors); i++)
{
gunichar ch;
gboolean result;
g_test_message ("Composing U+%06x and U+%06x; expecting U+%06x",
vectors[i].a, vectors[i].b, vectors[i].expected_result);
result = g_unichar_compose (vectors[i].a, vectors[i].b, &ch);
if (vectors[i].expected_result != 0)
{
g_assert_cmpuint (ch, ==, vectors[i].expected_result);
g_assert_true (result);
}
else
{
g_assert_cmpuint (ch, ==, 0);
g_assert_false (result);
}
}
}
/* Test that g_unichar_decompose() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_decompose (void)
{
gunichar a, b;
/* Not decomposable */
g_assert_false (g_unichar_decompose (0x0041, &a, &b) && a == 0x0041 && b == 0);
g_assert_false (g_unichar_decompose (0xFB01, &a, &b) && a == 0xFB01 && b == 0);
/* Singletons */
g_assert_true (g_unichar_decompose (0x212B, &a, &b) && a == 0x00C5 && b == 0);
g_assert_true (g_unichar_decompose (0x2126, &a, &b) && a == 0x03A9 && b == 0);
/* Tricky pairs */
g_assert_true (g_unichar_decompose (0x0344, &a, &b) && a == 0x0308 && b == 0x0301);
g_assert_true (g_unichar_decompose (0x0F73, &a, &b) && a == 0x0F71 && b == 0x0F72);
/* Pairs */
g_assert_true (g_unichar_decompose (0x00C5, &a, &b) && a == 0x0041 && b == 0x030A);
g_assert_true (g_unichar_decompose (0x00F4, &a, &b) && a == 0x006F && b == 0x0302);
g_assert_true (g_unichar_decompose (0x1E69, &a, &b) && a == 0x1E63 && b == 0x0307);
g_assert_true (g_unichar_decompose (0x1E63, &a, &b) && a == 0x0073 && b == 0x0323);
g_assert_true (g_unichar_decompose (0x1E0B, &a, &b) && a == 0x0064 && b == 0x0307);
g_assert_true (g_unichar_decompose (0x1E0D, &a, &b) && a == 0x0064 && b == 0x0323);
/* Hangul */
g_assert_true (g_unichar_decompose (0xD4DB, &a, &b) && a == 0xD4CC && b == 0x11B6);
g_assert_true (g_unichar_decompose (0xD4CC, &a, &b) && a == 0x1111 && b == 0x1171);
g_assert_true (g_unichar_decompose (0xCE31, &a, &b) && a == 0xCE20 && b == 0x11B8);
g_assert_true (g_unichar_decompose (0xCE20, &a, &b) && a == 0x110E && b == 0x1173);
/* Primary composite above U+FFFF (a significant boundary value in our implementation) */
g_assert_true (g_unichar_decompose (0x16121, &a, &b) && a == 0x1611E && b == 0x1611E); /* first and second char equal */
g_assert_true (g_unichar_decompose (0x16123, &a, &b) && a == 0x1611E && b == 0x1611F);
}
/* Test that g_unichar_fully_decompose() returns the correct value for
* various ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
test_fully_decompose_canonical (void)
{
const struct
{
gunichar input;
size_t expected_len;
gunichar expected_decomposition[4];
}
vectors[] =
{
#define TEST0(ch) { ch, 1, { ch, 0, 0, 0 }}
#define TEST1(ch, a) { ch, 1, { a, 0, 0, 0 }}
#define TEST2(ch, a, b) { ch, 2, { a, b, 0, 0 }}
#define TEST3(ch, a, b, c) { ch, 3, { a, b, c, 0 }}
#define TEST4(ch, a, b, c, d) { ch, 4, { a, b, c, d }}
/* Not decomposable */
TEST0 (0x0041),
TEST0 (0xFB01),
/* Singletons */
TEST2 (0x212B, 0x0041, 0x030A),
TEST1 (0x2126, 0x03A9),
/* Tricky pairs */
TEST2 (0x0344, 0x0308, 0x0301),
TEST2 (0x0F73, 0x0F71, 0x0F72),
/* General */
TEST2 (0x00C5, 0x0041, 0x030A),
TEST2 (0x00F4, 0x006F, 0x0302),
TEST3 (0x1E69, 0x0073, 0x0323, 0x0307),
TEST2 (0x1E63, 0x0073, 0x0323),
TEST2 (0x1E0B, 0x0064, 0x0307),
TEST2 (0x1E0D, 0x0064, 0x0323),
/* Hangul */
TEST3 (0xD4DB, 0x1111, 0x1171, 0x11B6),
TEST2 (0xD4CC, 0x1111, 0x1171),
TEST3 (0xCE31, 0x110E, 0x1173, 0x11B8),
TEST2 (0xCE20, 0x110E, 0x1173),
#undef TEST4
#undef TEST3
#undef TEST2
#undef TEST1
#undef TEST0
};
for (size_t i = 0; i < G_N_ELEMENTS (vectors); i++)
{
gunichar decomp[5];
size_t len;
g_test_message ("Fully decomposing U+%06x; expecting %" G_GSIZE_FORMAT " codepoints",
vectors[i].input, vectors[i].expected_len);
/* Test with all possible output array sizes, to check that the function
* can write partial results OK. */
for (size_t j = 0; j <= G_N_ELEMENTS (decomp); j++)
{
len = g_unichar_fully_decompose (vectors[i].input, FALSE, decomp, G_N_ELEMENTS (decomp) - j);
g_assert_cmpuint (len, ==, vectors[i].expected_len);
if (len >= j)
g_assert_cmpmem (decomp, (len - j) * sizeof (*decomp),
vectors[i].expected_decomposition, (vectors[i].expected_len - j) * sizeof (*vectors[i].expected_decomposition));
}
/* And again with no result array at all, just to get the length. */
len = g_unichar_fully_decompose (vectors[i].input, FALSE, NULL, 0);
g_assert_cmpuint (len, ==, vectors[i].expected_len);
}
}
/* Test that g_unicode_canonical_decomposition() returns the correct
* value for various ASCII and Unicode alphabetic, numeric, and other,
* codepoints. */
static void
test_canonical_decomposition (void)
{
gunichar *decomp;
gsize len;
#define TEST_DECOMP(ch, expected_len, a, b, c, d) \
decomp = g_unicode_canonical_decomposition (ch, &len); \
g_assert_cmpint (expected_len, ==, len); \
if (expected_len >= 1) g_assert_cmphex (decomp[0], ==, a); \
if (expected_len >= 2) g_assert_cmphex (decomp[1], ==, b); \
if (expected_len >= 3) g_assert_cmphex (decomp[2], ==, c); \
if (expected_len >= 4) g_assert_cmphex (decomp[3], ==, d); \
g_free (decomp);
#define TEST0(ch) TEST_DECOMP (ch, 1, ch, 0, 0, 0)
#define TEST1(ch, a) TEST_DECOMP (ch, 1, a, 0, 0, 0)
#define TEST2(ch, a, b) TEST_DECOMP (ch, 2, a, b, 0, 0)
#define TEST3(ch, a, b, c) TEST_DECOMP (ch, 3, a, b, c, 0)
#define TEST4(ch, a, b, c, d) TEST_DECOMP (ch, 4, a, b, c, d)
/* Not decomposable */
TEST0 (0x0041);
TEST0 (0xFB01);
/* Singletons */
TEST2 (0x212B, 0x0041, 0x030A);
TEST1 (0x2126, 0x03A9);
/* Tricky pairs */
TEST2 (0x0344, 0x0308, 0x0301);
TEST2 (0x0F73, 0x0F71, 0x0F72);
/* General */
TEST2 (0x00C5, 0x0041, 0x030A);
TEST2 (0x00F4, 0x006F, 0x0302);
TEST3 (0x1E69, 0x0073, 0x0323, 0x0307);
TEST2 (0x1E63, 0x0073, 0x0323);
TEST2 (0x1E0B, 0x0064, 0x0307);
TEST2 (0x1E0D, 0x0064, 0x0323);
/* Hangul */
TEST3 (0xD4DB, 0x1111, 0x1171, 0x11B6);
TEST2 (0xD4CC, 0x1111, 0x1171);
TEST3 (0xCE31, 0x110E, 0x1173, 0x11B8);
TEST2 (0xCE20, 0x110E, 0x1173);
#undef TEST_DECOMP
}
/* Test that g_unichar_decompose() whenever encouttering a char ch
* decomposes into a and b, b itself won't decompose any further. */
static void
test_decompose_tail (void)
{
gunichar ch, a, b, c, d;
/* Test that whenever a char ch decomposes into a and b, b itself
* won't decompose any further. */
for (ch = 0; ch < 0x110000; ch++)
if (g_unichar_decompose (ch, &a, &b))
g_assert_false (g_unichar_decompose (b, &c, &d));
else
{
g_assert_cmpuint (a, ==, ch);
g_assert_cmpuint (b, ==, 0);
}
}
/* Test that all canonical decompositions of g_unichar_fully_decompose()
* are at most 4 in length, and compatibility decompositions are
* at most 18 in length. */
static void
test_fully_decompose_len (void)
{
gunichar ch;
/* Test that all canonical decompositions are at most 4 in length,
* and compatibility decompositions are at most 18 in length.
*/
for (ch = 0; ch < 0x110000; ch++) {
g_assert_cmpint (g_unichar_fully_decompose (ch, FALSE, NULL, 0), <=, 4);
g_assert_cmpint (g_unichar_fully_decompose (ch, TRUE, NULL, 0), <=, 18);
}
}
/* Check various examples from Unicode Annex #15 for NFD and NFC
* normalization.
*/
static void
test_normalization (void)
{
const struct {
const char *source;
const char *nfd;
const char *nfc;
} tests[] = {
// Singletons
{ "\xe2\x84\xab", "A\xcc\x8a", "Å" }, // U+212B ANGSTROM SIGN
{ "\xe2\x84\xa6", "Ω", "Ω" }, // U+2126 OHM SIGN
// Canonical Composites
{ "Å", "A\xcc\x8a", "Å" }, // U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
{ "ô", "o\xcc\x82", "ô" }, // U+00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
// Multiple Combining Marks
{ "\xe1\xb9\xa9", "s\xcc\xa3\xcc\x87", "" }, // U+1E69 LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
{ "\xe1\xb8\x8b\xcc\xa3", "d\xcc\xa3\xcc\x87", "ḍ̇" },
{ "q\xcc\x87\xcc\xa3", "q\xcc\xa3\xcc\x87", "q̣̇" },
// Compatibility Composites
{ "", "", "" }, // U+FB01 LATIN SMALL LIGATURE FI
{ "2\xe2\x81\xb5", "2\xe2\x81\xb5", "2⁵" },
{ "\xe1\xba\x9b\xcc\xa3", "\xc5\xbf\xcc\xa3\xcc\x87", "ẛ̣" },
// Tests for behavior with reordered marks
{ "s\xcc\x87\xcc\xa3", "s\xcc\xa3\xcc\x87", "" },
{ "α\xcc\x94\xcd\x82", "α\xcc\x94\xcd\x82", "" },
{ "α\xcd\x82\xcc\x94", "α\xcd\x82\xcc\x94", "\xcc\x94" },
};
gsize i;
for (i = 0; i < G_N_ELEMENTS (tests); i++)
{
char *nfd, *nfc;
nfd = g_utf8_normalize (tests[i].source, -1, G_NORMALIZE_NFD);
g_assert_cmpstr (nfd, ==, tests[i].nfd);
nfc = g_utf8_normalize (tests[i].nfd, -1, G_NORMALIZE_NFC);
g_assert_cmpstr (nfc, ==, tests[i].nfc);
g_free (nfd);
g_free (nfc);
}
}
static void
test_iso15924 (void)
{
const struct {
GUnicodeScript script;
char four_letter_code[5];
} data[] = {
{ G_UNICODE_SCRIPT_COMMON, "Zyyy" },
{ G_UNICODE_SCRIPT_INHERITED, "Zinh" },
{ G_UNICODE_SCRIPT_MATH, "Zmth" },
{ G_UNICODE_SCRIPT_ARABIC, "Arab" },
{ G_UNICODE_SCRIPT_ARMENIAN, "Armn" },
{ G_UNICODE_SCRIPT_BENGALI, "Beng" },
{ G_UNICODE_SCRIPT_BOPOMOFO, "Bopo" },
{ G_UNICODE_SCRIPT_CHEROKEE, "Cher" },
{ G_UNICODE_SCRIPT_COPTIC, "Copt" },
{ G_UNICODE_SCRIPT_CYRILLIC, "Cyrl" },
{ G_UNICODE_SCRIPT_DESERET, "Dsrt" },
{ G_UNICODE_SCRIPT_DEVANAGARI, "Deva" },
{ G_UNICODE_SCRIPT_ETHIOPIC, "Ethi" },
{ G_UNICODE_SCRIPT_GEORGIAN, "Geor" },
{ G_UNICODE_SCRIPT_GOTHIC, "Goth" },
{ G_UNICODE_SCRIPT_GREEK, "Grek" },
{ G_UNICODE_SCRIPT_GUJARATI, "Gujr" },
{ G_UNICODE_SCRIPT_GURMUKHI, "Guru" },
{ G_UNICODE_SCRIPT_HAN, "Hani" },
{ G_UNICODE_SCRIPT_HANGUL, "Hang" },
{ G_UNICODE_SCRIPT_HEBREW, "Hebr" },
{ G_UNICODE_SCRIPT_HIRAGANA, "Hira" },
{ G_UNICODE_SCRIPT_KANNADA, "Knda" },
{ G_UNICODE_SCRIPT_KATAKANA, "Kana" },
{ G_UNICODE_SCRIPT_KHMER, "Khmr" },
{ G_UNICODE_SCRIPT_LAO, "Laoo" },
{ G_UNICODE_SCRIPT_LATIN, "Latn" },
{ G_UNICODE_SCRIPT_MALAYALAM, "Mlym" },
{ G_UNICODE_SCRIPT_MONGOLIAN, "Mong" },
{ G_UNICODE_SCRIPT_MYANMAR, "Mymr" },
{ G_UNICODE_SCRIPT_OGHAM, "Ogam" },
{ G_UNICODE_SCRIPT_OLD_ITALIC, "Ital" },
{ G_UNICODE_SCRIPT_ORIYA, "Orya" },
{ G_UNICODE_SCRIPT_RUNIC, "Runr" },
{ G_UNICODE_SCRIPT_SINHALA, "Sinh" },
{ G_UNICODE_SCRIPT_SYRIAC, "Syrc" },
{ G_UNICODE_SCRIPT_TAMIL, "Taml" },
{ G_UNICODE_SCRIPT_TELUGU, "Telu" },
{ G_UNICODE_SCRIPT_THAANA, "Thaa" },
{ G_UNICODE_SCRIPT_THAI, "Thai" },
{ G_UNICODE_SCRIPT_TIBETAN, "Tibt" },
{ G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, "Cans" },
{ G_UNICODE_SCRIPT_YI, "Yiii" },
{ G_UNICODE_SCRIPT_TAGALOG, "Tglg" },
{ G_UNICODE_SCRIPT_HANUNOO, "Hano" },
{ G_UNICODE_SCRIPT_BUHID, "Buhd" },
{ G_UNICODE_SCRIPT_TAGBANWA, "Tagb" },
/* Unicode-4.0 additions */
{ G_UNICODE_SCRIPT_BRAILLE, "Brai" },
{ G_UNICODE_SCRIPT_CYPRIOT, "Cprt" },
{ G_UNICODE_SCRIPT_LIMBU, "Limb" },
{ G_UNICODE_SCRIPT_OSMANYA, "Osma" },
{ G_UNICODE_SCRIPT_SHAVIAN, "Shaw" },
{ G_UNICODE_SCRIPT_LINEAR_B, "Linb" },
{ G_UNICODE_SCRIPT_TAI_LE, "Tale" },
{ G_UNICODE_SCRIPT_UGARITIC, "Ugar" },
/* Unicode-4.1 additions */
{ G_UNICODE_SCRIPT_NEW_TAI_LUE, "Talu" },
{ G_UNICODE_SCRIPT_BUGINESE, "Bugi" },
{ G_UNICODE_SCRIPT_GLAGOLITIC, "Glag" },
{ G_UNICODE_SCRIPT_TIFINAGH, "Tfng" },
{ G_UNICODE_SCRIPT_SYLOTI_NAGRI, "Sylo" },
{ G_UNICODE_SCRIPT_OLD_PERSIAN, "Xpeo" },
{ G_UNICODE_SCRIPT_KHAROSHTHI, "Khar" },
/* Unicode-5.0 additions */
{ G_UNICODE_SCRIPT_UNKNOWN, "Zzzz" },
{ G_UNICODE_SCRIPT_BALINESE, "Bali" },
{ G_UNICODE_SCRIPT_CUNEIFORM, "Xsux" },
{ G_UNICODE_SCRIPT_PHOENICIAN, "Phnx" },
{ G_UNICODE_SCRIPT_PHAGS_PA, "Phag" },
{ G_UNICODE_SCRIPT_NKO, "Nkoo" },
/* Unicode-5.1 additions */
{ G_UNICODE_SCRIPT_KAYAH_LI, "Kali" },
{ G_UNICODE_SCRIPT_LEPCHA, "Lepc" },
{ G_UNICODE_SCRIPT_REJANG, "Rjng" },
{ G_UNICODE_SCRIPT_SUNDANESE, "Sund" },
{ G_UNICODE_SCRIPT_SAURASHTRA, "Saur" },
{ G_UNICODE_SCRIPT_CHAM, "Cham" },
{ G_UNICODE_SCRIPT_OL_CHIKI, "Olck" },
{ G_UNICODE_SCRIPT_VAI, "Vaii" },
{ G_UNICODE_SCRIPT_CARIAN, "Cari" },
{ G_UNICODE_SCRIPT_LYCIAN, "Lyci" },
{ G_UNICODE_SCRIPT_LYDIAN, "Lydi" },
/* Unicode-5.2 additions */
{ G_UNICODE_SCRIPT_AVESTAN, "Avst" },
{ G_UNICODE_SCRIPT_BAMUM, "Bamu" },
{ G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, "Egyp" },
{ G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, "Armi" },
{ G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, "Phli" },
{ G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, "Prti" },
{ G_UNICODE_SCRIPT_JAVANESE, "Java" },
{ G_UNICODE_SCRIPT_KAITHI, "Kthi" },
{ G_UNICODE_SCRIPT_LISU, "Lisu" },
{ G_UNICODE_SCRIPT_MEETEI_MAYEK, "Mtei" },
{ G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, "Sarb" },
{ G_UNICODE_SCRIPT_OLD_TURKIC, "Orkh" },
{ G_UNICODE_SCRIPT_SAMARITAN, "Samr" },
{ G_UNICODE_SCRIPT_TAI_THAM, "Lana" },
{ G_UNICODE_SCRIPT_TAI_VIET, "Tavt" },
/* Unicode-6.0 additions */
{ G_UNICODE_SCRIPT_BATAK, "Batk" },
{ G_UNICODE_SCRIPT_BRAHMI, "Brah" },
{ G_UNICODE_SCRIPT_MANDAIC, "Mand" },
/* Unicode-6.1 additions */
{ G_UNICODE_SCRIPT_CHAKMA, "Cakm" },
{ G_UNICODE_SCRIPT_MEROITIC_CURSIVE, "Merc" },
{ G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, "Mero" },
{ G_UNICODE_SCRIPT_MIAO, "Plrd" },
{ G_UNICODE_SCRIPT_SHARADA, "Shrd" },
{ G_UNICODE_SCRIPT_SORA_SOMPENG, "Sora" },
{ G_UNICODE_SCRIPT_TAKRI, "Takr" },
/* Unicode 7.0 additions */
{ G_UNICODE_SCRIPT_BASSA_VAH, "Bass" },
{ G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, "Aghb" },
{ G_UNICODE_SCRIPT_DUPLOYAN, "Dupl" },
{ G_UNICODE_SCRIPT_ELBASAN, "Elba" },
{ G_UNICODE_SCRIPT_GRANTHA, "Gran" },
{ G_UNICODE_SCRIPT_KHOJKI, "Khoj" },
{ G_UNICODE_SCRIPT_KHUDAWADI, "Sind" },
{ G_UNICODE_SCRIPT_LINEAR_A, "Lina" },
{ G_UNICODE_SCRIPT_MAHAJANI, "Mahj" },
{ G_UNICODE_SCRIPT_MANICHAEAN, "Mani" },
{ G_UNICODE_SCRIPT_MENDE_KIKAKUI, "Mend" },
{ G_UNICODE_SCRIPT_MODI, "Modi" },
{ G_UNICODE_SCRIPT_MRO, "Mroo" },
{ G_UNICODE_SCRIPT_NABATAEAN, "Nbat" },
{ G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, "Narb" },
{ G_UNICODE_SCRIPT_OLD_PERMIC, "Perm" },
{ G_UNICODE_SCRIPT_PAHAWH_HMONG, "Hmng" },
{ G_UNICODE_SCRIPT_PALMYRENE, "Palm" },
{ G_UNICODE_SCRIPT_PAU_CIN_HAU, "Pauc" },
{ G_UNICODE_SCRIPT_PSALTER_PAHLAVI, "Phlp" },
{ G_UNICODE_SCRIPT_SIDDHAM, "Sidd" },
{ G_UNICODE_SCRIPT_TIRHUTA, "Tirh" },
{ G_UNICODE_SCRIPT_WARANG_CITI, "Wara" },
/* Unicode 8.0 additions */
{ G_UNICODE_SCRIPT_AHOM, "Ahom" },
{ G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS, "Hluw" },
{ G_UNICODE_SCRIPT_HATRAN, "Hatr" },
{ G_UNICODE_SCRIPT_MULTANI, "Mult" },
{ G_UNICODE_SCRIPT_OLD_HUNGARIAN, "Hung" },
{ G_UNICODE_SCRIPT_SIGNWRITING, "Sgnw" },
/* Unicode 9.0 additions */
{ G_UNICODE_SCRIPT_ADLAM, "Adlm" },
{ G_UNICODE_SCRIPT_BHAIKSUKI, "Bhks" },
{ G_UNICODE_SCRIPT_MARCHEN, "Marc" },
{ G_UNICODE_SCRIPT_NEWA, "Newa" },
{ G_UNICODE_SCRIPT_OSAGE, "Osge" },
{ G_UNICODE_SCRIPT_TANGUT, "Tang" },
/* Unicode 10.0 additions */
{ G_UNICODE_SCRIPT_MASARAM_GONDI, "Gonm" },
{ G_UNICODE_SCRIPT_NUSHU, "Nshu" },
{ G_UNICODE_SCRIPT_SOYOMBO, "Soyo" },
{ G_UNICODE_SCRIPT_ZANABAZAR_SQUARE, "Zanb" },
/* Unicode 11.0 additions */
{ G_UNICODE_SCRIPT_DOGRA, "Dogr" },
{ G_UNICODE_SCRIPT_GUNJALA_GONDI, "Gong" },
{ G_UNICODE_SCRIPT_HANIFI_ROHINGYA, "Rohg" },
{ G_UNICODE_SCRIPT_MAKASAR, "Maka" },
{ G_UNICODE_SCRIPT_MEDEFAIDRIN, "Medf" },
{ G_UNICODE_SCRIPT_OLD_SOGDIAN, "Sogo" },
{ G_UNICODE_SCRIPT_SOGDIAN, "Sogd" },
/* Unicode 12.0 additions */
{ G_UNICODE_SCRIPT_ELYMAIC, "Elym" },
{ G_UNICODE_SCRIPT_NANDINAGARI, "Nand" },
{ G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG, "Hmnp" },
{ G_UNICODE_SCRIPT_WANCHO, "Wcho" },
/* Unicode 13.0 additions */
{ G_UNICODE_SCRIPT_CHORASMIAN, "Chrs" },
{ G_UNICODE_SCRIPT_DIVES_AKURU, "Diak" },
{ G_UNICODE_SCRIPT_KHITAN_SMALL_SCRIPT, "Kits" },
{ G_UNICODE_SCRIPT_YEZIDI, "Yezi" },
/* Unicode 14.0 additions */
{ G_UNICODE_SCRIPT_CYPRO_MINOAN, "Cpmn" },
{ G_UNICODE_SCRIPT_OLD_UYGHUR, "Ougr" },
{ G_UNICODE_SCRIPT_TANGSA, "Tnsa" },
{ G_UNICODE_SCRIPT_TOTO, "Toto" },
{ G_UNICODE_SCRIPT_VITHKUQI, "Vith" },
/* Unicode 15.0 additions */
{ G_UNICODE_SCRIPT_KAWI, "Kawi" },
{ G_UNICODE_SCRIPT_NAG_MUNDARI, "Nagm" },
/* Unicode 16.0 additions */
{ G_UNICODE_SCRIPT_TODHRI, "Todr" },
{ G_UNICODE_SCRIPT_GARAY, "Gara" },
{ G_UNICODE_SCRIPT_TULU_TIGALARI, "Tutg" },
{ G_UNICODE_SCRIPT_SUNUWAR, "Sunu" },
{ G_UNICODE_SCRIPT_GURUNG_KHEMA, "Gukh" },
{ G_UNICODE_SCRIPT_KIRAT_RAI, "Krai" },
{ G_UNICODE_SCRIPT_OL_ONAL, "Onao" },
};
guint i;
g_assert_cmphex (0, ==,
g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_INVALID_CODE));
g_assert_cmphex (0x5A7A7A7A, ==, g_unicode_script_to_iso15924 (1000));
g_assert_cmphex (0x41726162, ==,
g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_ARABIC));
g_assert_cmphex (G_UNICODE_SCRIPT_INVALID_CODE, ==,
g_unicode_script_from_iso15924 (0));
g_assert_cmphex (G_UNICODE_SCRIPT_UNKNOWN, ==,
g_unicode_script_from_iso15924 (0x12345678));
#define PACK(a,b,c,d) \
((guint32)((((guint8)(a))<<24)|(((guint8)(b))<<16)|(((guint8)(c))<<8)|((guint8)(d))))
for (i = 0; i < G_N_ELEMENTS (data); i++)
{
guint32 code = PACK (data[i].four_letter_code[0],
data[i].four_letter_code[1],
data[i].four_letter_code[2],
data[i].four_letter_code[3]);
g_test_message ("Testing script %s (code %u)", data[i].four_letter_code, code);
g_assert_cmphex (g_unicode_script_to_iso15924 (data[i].script), ==, code);
g_assert_cmpint (g_unicode_script_from_iso15924 (code), ==, data[i].script);
}
#undef PACK
}
static void
test_normalize (void)
{
guint i;
typedef struct
{
const gchar *str;
const gchar *nfd;
const gchar *nfc;
const gchar *nfkd;
const gchar *nfkc;
} Test;
Test tests[] = {
{ "Äffin", "A\u0308ffin", "Äffin", "A\u0308ffin", "Äffin" },
{ "Ä\uFB03n", "A\u0308\uFB03n", "Ä\uFB03n", "A\u0308ffin", "Äffin" },
{ "Henry IV", "Henry IV", "Henry IV", "Henry IV", "Henry IV" },
{ "Henry \u2163", "Henry \u2163", "Henry \u2163", "Henry IV", "Henry IV" },
{ "non-utf\x88", NULL, NULL, NULL, NULL },
{ "", "", "", "", "" },
};
#define TEST(str, mode, expected) \
{ \
gchar *normalized = g_utf8_normalize (str, -1, mode); \
g_assert_cmpstr (normalized, ==, expected); \
g_free (normalized); \
}
for (i = 0; i < G_N_ELEMENTS (tests); i++)
{
TEST (tests[i].str, G_NORMALIZE_NFD, tests[i].nfd);
TEST (tests[i].str, G_NORMALIZE_NFC, tests[i].nfc);
TEST (tests[i].str, G_NORMALIZE_NFKD, tests[i].nfkd);
TEST (tests[i].str, G_NORMALIZE_NFKC, tests[i].nfkc);
}
#undef TEST
}
int
main (int argc,
char *argv[])
{
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/unicode/alnum", test_alnum);
g_test_add_func ("/unicode/alpha", test_alpha);
g_test_add_func ("/unicode/break-type", test_unichar_break_type);
g_test_add_func ("/unicode/canonical-decomposition", test_canonical_decomposition);
g_test_add_func ("/unicode/casefold", test_casefold);
g_test_add_func ("/unicode/casemap_and_casefold", test_casemap_and_casefold);
g_test_add_func ("/unicode/cases", test_cases);
g_test_add_func ("/unicode/character-type", test_unichar_character_type);
g_test_add_func ("/unicode/cntrl", test_cntrl);
g_test_add_func ("/unicode/combining-class", test_combining_class);
g_test_add_func ("/unicode/compose", test_compose);
g_test_add_func ("/unicode/decompose", test_decompose);
g_test_add_func ("/unicode/decompose-tail", test_decompose_tail);
g_test_add_func ("/unicode/defined", test_defined);
g_test_add_func ("/unicode/digit", test_digit);
g_test_add_func ("/unicode/digit-value", test_digit_value);
g_test_add_func ("/unicode/fully-decompose-canonical", test_fully_decompose_canonical);
g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
g_test_add_func ("/unicode/normalization", test_normalization);
g_test_add_func ("/unicode/graph", test_graph);
g_test_add_func ("/unicode/iso15924", test_iso15924);
g_test_add_func ("/unicode/lower", test_lower);
g_test_add_func ("/unicode/mark", test_mark);
g_test_add_func ("/unicode/mirror", test_mirror);
g_test_add_func ("/unicode/print", test_print);
g_test_add_func ("/unicode/punctuation", test_punctuation);
g_test_add_func ("/unicode/script", test_unichar_script);
g_test_add_func ("/unicode/space", test_space);
g_test_add_func ("/unicode/strdown", test_strdown);
g_test_add_func ("/unicode/strup", test_strup);
g_test_add_func ("/unicode/turkish-strupdown", test_turkish_strupdown);
g_test_add_func ("/unicode/title", test_title);
g_test_add_func ("/unicode/upper", test_upper);
g_test_add_func ("/unicode/validate", test_unichar_validate);
g_test_add_func ("/unicode/wide", test_wide);
g_test_add_func ("/unicode/xdigit", test_xdigit);
g_test_add_func ("/unicode/xdigit-value", test_xdigit_value);
g_test_add_func ("/unicode/zero-width", test_zerowidth);
g_test_add_func ("/unicode/normalize", test_normalize);
return g_test_run();
}