From b9f07a458a338a1fabac995183ef5a45bbafefcf Mon Sep 17 00:00:00 2001 From: Emmanuel Fleury Date: Fri, 14 Jan 2022 14:14:48 +0100 Subject: [PATCH] Merge test/unicode-caseconc.c into glib/tests/unicode.c Related to issue #1434 --- {tests => glib/tests}/casefold.txt | 0 {tests => glib/tests}/casemap.txt | 0 {tests => glib/tests}/gen-casefold-txt.py | 0 {tests => glib/tests}/gen-casemap-txt.py | 0 glib/tests/meson.build | 10 +- glib/tests/unicode.c | 95 ++++++++++++++++ tests/meson.build | 3 - tests/unicode-caseconv.c | 131 ---------------------- 8 files changed, 101 insertions(+), 138 deletions(-) rename {tests => glib/tests}/casefold.txt (100%) rename {tests => glib/tests}/casemap.txt (100%) rename {tests => glib/tests}/gen-casefold-txt.py (100%) rename {tests => glib/tests}/gen-casemap-txt.py (100%) delete mode 100644 tests/unicode-caseconv.c diff --git a/tests/casefold.txt b/glib/tests/casefold.txt similarity index 100% rename from tests/casefold.txt rename to glib/tests/casefold.txt diff --git a/tests/casemap.txt b/glib/tests/casemap.txt similarity index 100% rename from tests/casemap.txt rename to glib/tests/casemap.txt diff --git a/tests/gen-casefold-txt.py b/glib/tests/gen-casefold-txt.py similarity index 100% rename from tests/gen-casefold-txt.py rename to glib/tests/gen-casefold-txt.py diff --git a/tests/gen-casemap-txt.py b/glib/tests/gen-casemap-txt.py similarity index 100% rename from tests/gen-casemap-txt.py rename to glib/tests/gen-casemap-txt.py diff --git a/glib/tests/meson.build b/glib/tests/meson.build index 862bcdf6d..125d38ec9 100644 --- a/glib/tests/meson.build +++ b/glib/tests/meson.build @@ -184,13 +184,15 @@ endif if installed_tests_enabled install_data( - 'keyfiletest.ini', - 'pages.ini', - 'keyfile.c', - 'empty', '4096-random-bytes', + 'casefold.txt', + 'casemap.txt', 'echo-script', 'echo-script.bat', + 'empty', + 'keyfile.c', + 'keyfiletest.ini', + 'pages.ini', install_dir : installed_tests_execdir, ) install_subdir('bookmarks', install_dir : installed_tests_execdir) diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c index 9d6596687..75487fd53 100644 --- a/glib/tests/unicode.c +++ b/glib/tests/unicode.c @@ -28,6 +28,7 @@ #endif #include +#include #include "glib.h" @@ -535,6 +536,99 @@ test_casefold (void) g_free (str_casefold); } +static void +test_casemap_and_casefold (void) +{ + FILE *infile; + char buffer[1024]; + char **strings; + char *filename; + const char *locale; + const char *test; + const char *expected; + char *convert; + char *current_locale = setlocale (LC_CTYPE, NULL); + + filename = g_test_build_filename (G_TEST_DIST, "casemap.txt", NULL); + infile = fopen (filename, "r"); + g_assert (infile != NULL); + + while (fgets (buffer, sizeof (buffer), infile)) + { + if (buffer[0] == '#') + continue; + + strings = g_strsplit (buffer, "\t", -1); + locale = strings[0]; + if (!locale[0]) + locale = "C"; + + if (strcmp (locale, current_locale) != 0) + { + setlocale (LC_CTYPE, locale); + current_locale = setlocale (LC_CTYPE, NULL); + + if (strncmp (current_locale, locale, 2) != 0) + { + g_test_message ("Cannot set locale to %s, skipping", locale); + goto next; + } + } + + test = strings[1]; + + /* gen-casemap-txt.py uses an empty string when a single + * character doesn't have an equivalent in a particular case; + * since that behavior is nonsense for multicharacter strings, + * it would make more sense to put the expected result ... the + * original character unchanged. But for now, we just work + * around it here and take the empty string to mean "same as + * original" + */ + + convert = g_utf8_strup (test, -1); + expected = strings[4][0] ? strings[4] : test; + g_assert_cmpstr (convert, ==, expected); + g_free (convert); + + convert = g_utf8_strdown (test, -1); + expected = strings[2][0] ? strings[2] : test; + g_assert_cmpstr (convert, ==, expected); + g_free (convert); + + next: + g_strfreev (strings); + } + + fclose (infile); + + g_free (filename); + filename = g_test_build_filename (G_TEST_DIST, "casefold.txt", NULL); + + infile = fopen (filename, "r"); + g_assert (infile != NULL); + + while (fgets (buffer, sizeof (buffer), infile)) + { + if (buffer[0] == '#') + continue; + + buffer[strlen (buffer) - 1] = '\0'; + strings = g_strsplit (buffer, "\t", -1); + + test = strings[0]; + + convert = g_utf8_casefold (test, -1); + g_assert_cmpstr (convert, ==, strings[1]); + g_free (convert); + + g_strfreev (strings); + } + + fclose (infile); + g_free (filename); +} + /* Test that g_unichar_ismark() returns the correct value for various * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ static void @@ -1720,6 +1814,7 @@ main (int argc, g_test_add_func ("/unicode/break-type", test_unichar_break_type); g_test_add_func ("/unicode/canonical-decomposition", test_canonical_decomposition); g_test_add_func ("/unicode/casefold", test_casefold); + g_test_add_func ("/unicode/casemap_and_casefold", test_casemap_and_casefold); g_test_add_func ("/unicode/cases", test_cases); g_test_add_func ("/unicode/character-type", test_unichar_character_type); g_test_add_func ("/unicode/cntrl", test_cntrl); diff --git a/tests/meson.build b/tests/meson.build index 585e10549..813b83a3d 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -32,7 +32,6 @@ tests = { 'thread-test' : {}, 'threadpool-test' : {'suite' : ['slow']}, 'type-test' : {}, - 'unicode-caseconv' : {}, 'unicode-encoding' : {}, 'module-test-library' : { 'dependencies' : [libgmodule_dep], @@ -73,8 +72,6 @@ endif if installed_tests_enabled install_data( 'iochannel-test-infile', - 'casemap.txt', - 'casefold.txt', 'utf8.txt', install_dir : installed_tests_execdir, ) diff --git a/tests/unicode-caseconv.c b/tests/unicode-caseconv.c deleted file mode 100644 index c124633d1..000000000 --- a/tests/unicode-caseconv.c +++ /dev/null @@ -1,131 +0,0 @@ -#undef G_DISABLE_ASSERT -#undef G_LOG_DOMAIN - -#include -#include -#include -#include -#include - -int main (int argc, char **argv) -{ - FILE *infile; - char buffer[1024]; - char **strings; - char *filename; - const char *locale; - const char *test; - const char *expected; - char *convert; - char *current_locale = setlocale (LC_CTYPE, NULL); - gint result = 0; - - g_test_init (&argc, &argv, NULL); - - filename = g_test_build_filename (G_TEST_DIST, "casemap.txt", NULL); - - infile = fopen (filename, "r"); - if (!infile) - { - fprintf (stderr, "Failed to open %s\n", filename ); - exit (1); - } - - while (fgets (buffer, sizeof(buffer), infile)) - { - if (buffer[0] == '#') - continue; - - strings = g_strsplit (buffer, "\t", -1); - - locale = strings[0]; - - if (!locale[0]) - locale = "C"; - - if (strcmp (locale, current_locale) != 0) - { - setlocale (LC_CTYPE, locale); - current_locale = setlocale (LC_CTYPE, NULL); - - if (strncmp (current_locale, locale, 2) != 0) - { - fprintf (stderr, "Cannot set locale to %s, skipping\n", locale); - goto next; - } - } - - test = strings[1]; - - /* gen-casemap-txt.py uses an empty string when a single character - * doesn't have an equivalent in a particular case; since that behavior - * is nonsense for multicharacter strings, it would make more sense - * to put the expected result .. the original character unchanged. But - * for now, we just work around it here and take the empty string to mean - * "same as original" - */ - - convert = g_utf8_strup (test, -1); - expected = strings[4][0] ? strings[4] : test; - if (strcmp (convert, expected) != 0) - { - fprintf (stderr, "Failure: toupper(%s) == %s, should have been %s\n", - test, convert, expected); - result = 1; - } - g_free (convert); - - convert = g_utf8_strdown (test, -1); - expected = strings[2][0] ? strings[2] : test; - if (strcmp (convert, expected) != 0) - { - fprintf (stderr, "Failure: tolower(%s) == %s, should have been %s\n", - test, convert, expected); - result = 1; - } - g_free (convert); - - next: - g_strfreev (strings); - } - - fclose (infile); - - g_free (filename); - filename = g_test_build_filename (G_TEST_DIST, "casefold.txt", NULL); - - infile = fopen (filename, "r"); - if (!infile) - { - fprintf (stderr, "Failed to open %s\n", filename ); - g_free (filename); - exit (1); - } - - while (fgets (buffer, sizeof(buffer), infile)) - { - if (buffer[0] == '#') - continue; - - buffer[strlen(buffer) - 1] = '\0'; - strings = g_strsplit (buffer, "\t", -1); - - test = strings[0]; - - convert = g_utf8_casefold (test, -1); - if (strcmp (convert, strings[1]) != 0) - { - fprintf (stderr, "Failure: casefold(%s) == '%s', should have been '%s'\n", - test, convert, strings[1]); - result = 1; - } - g_free (convert); - - g_strfreev (strings); - } - - fclose (infile); - g_free (filename); - - return result; -}