Merge test/unicode-caseconc.c into glib/tests/unicode.c

Related to issue #1434
This commit is contained in:
Emmanuel Fleury 2022-01-14 14:14:48 +01:00
parent ae0ec9b753
commit b9f07a458a
8 changed files with 101 additions and 138 deletions

View File

@ -184,13 +184,15 @@ endif
if installed_tests_enabled if installed_tests_enabled
install_data( install_data(
'keyfiletest.ini',
'pages.ini',
'keyfile.c',
'empty',
'4096-random-bytes', '4096-random-bytes',
'casefold.txt',
'casemap.txt',
'echo-script', 'echo-script',
'echo-script.bat', 'echo-script.bat',
'empty',
'keyfile.c',
'keyfiletest.ini',
'pages.ini',
install_dir : installed_tests_execdir, install_dir : installed_tests_execdir,
) )
install_subdir('bookmarks', install_dir : installed_tests_execdir) install_subdir('bookmarks', install_dir : installed_tests_execdir)

View File

@ -28,6 +28,7 @@
#endif #endif
#include <locale.h> #include <locale.h>
#include <stdio.h>
#include "glib.h" #include "glib.h"
@ -535,6 +536,99 @@ test_casefold (void)
g_free (str_casefold); g_free (str_casefold);
} }
static void
test_casemap_and_casefold (void)
{
FILE *infile;
char buffer[1024];
char **strings;
char *filename;
const char *locale;
const char *test;
const char *expected;
char *convert;
char *current_locale = setlocale (LC_CTYPE, NULL);
filename = g_test_build_filename (G_TEST_DIST, "casemap.txt", NULL);
infile = fopen (filename, "r");
g_assert (infile != NULL);
while (fgets (buffer, sizeof (buffer), infile))
{
if (buffer[0] == '#')
continue;
strings = g_strsplit (buffer, "\t", -1);
locale = strings[0];
if (!locale[0])
locale = "C";
if (strcmp (locale, current_locale) != 0)
{
setlocale (LC_CTYPE, locale);
current_locale = setlocale (LC_CTYPE, NULL);
if (strncmp (current_locale, locale, 2) != 0)
{
g_test_message ("Cannot set locale to %s, skipping", locale);
goto next;
}
}
test = strings[1];
/* gen-casemap-txt.py uses an empty string when a single
* character doesn't have an equivalent in a particular case;
* since that behavior is nonsense for multicharacter strings,
* it would make more sense to put the expected result ... the
* original character unchanged. But for now, we just work
* around it here and take the empty string to mean "same as
* original"
*/
convert = g_utf8_strup (test, -1);
expected = strings[4][0] ? strings[4] : test;
g_assert_cmpstr (convert, ==, expected);
g_free (convert);
convert = g_utf8_strdown (test, -1);
expected = strings[2][0] ? strings[2] : test;
g_assert_cmpstr (convert, ==, expected);
g_free (convert);
next:
g_strfreev (strings);
}
fclose (infile);
g_free (filename);
filename = g_test_build_filename (G_TEST_DIST, "casefold.txt", NULL);
infile = fopen (filename, "r");
g_assert (infile != NULL);
while (fgets (buffer, sizeof (buffer), infile))
{
if (buffer[0] == '#')
continue;
buffer[strlen (buffer) - 1] = '\0';
strings = g_strsplit (buffer, "\t", -1);
test = strings[0];
convert = g_utf8_casefold (test, -1);
g_assert_cmpstr (convert, ==, strings[1]);
g_free (convert);
g_strfreev (strings);
}
fclose (infile);
g_free (filename);
}
/* Test that g_unichar_ismark() returns the correct value for various /* Test that g_unichar_ismark() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */ * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void static void
@ -1720,6 +1814,7 @@ main (int argc,
g_test_add_func ("/unicode/break-type", test_unichar_break_type); g_test_add_func ("/unicode/break-type", test_unichar_break_type);
g_test_add_func ("/unicode/canonical-decomposition", test_canonical_decomposition); g_test_add_func ("/unicode/canonical-decomposition", test_canonical_decomposition);
g_test_add_func ("/unicode/casefold", test_casefold); g_test_add_func ("/unicode/casefold", test_casefold);
g_test_add_func ("/unicode/casemap_and_casefold", test_casemap_and_casefold);
g_test_add_func ("/unicode/cases", test_cases); g_test_add_func ("/unicode/cases", test_cases);
g_test_add_func ("/unicode/character-type", test_unichar_character_type); g_test_add_func ("/unicode/character-type", test_unichar_character_type);
g_test_add_func ("/unicode/cntrl", test_cntrl); g_test_add_func ("/unicode/cntrl", test_cntrl);

View File

@ -32,7 +32,6 @@ tests = {
'thread-test' : {}, 'thread-test' : {},
'threadpool-test' : {'suite' : ['slow']}, 'threadpool-test' : {'suite' : ['slow']},
'type-test' : {}, 'type-test' : {},
'unicode-caseconv' : {},
'unicode-encoding' : {}, 'unicode-encoding' : {},
'module-test-library' : { 'module-test-library' : {
'dependencies' : [libgmodule_dep], 'dependencies' : [libgmodule_dep],
@ -73,8 +72,6 @@ endif
if installed_tests_enabled if installed_tests_enabled
install_data( install_data(
'iochannel-test-infile', 'iochannel-test-infile',
'casemap.txt',
'casefold.txt',
'utf8.txt', 'utf8.txt',
install_dir : installed_tests_execdir, install_dir : installed_tests_execdir,
) )

View File

@ -1,131 +0,0 @@
#undef G_DISABLE_ASSERT
#undef G_LOG_DOMAIN
#include <locale.h>
#include <stdlib.h>
#include <stdio.h>
#include <glib.h>
#include <string.h>
int main (int argc, char **argv)
{
FILE *infile;
char buffer[1024];
char **strings;
char *filename;
const char *locale;
const char *test;
const char *expected;
char *convert;
char *current_locale = setlocale (LC_CTYPE, NULL);
gint result = 0;
g_test_init (&argc, &argv, NULL);
filename = g_test_build_filename (G_TEST_DIST, "casemap.txt", NULL);
infile = fopen (filename, "r");
if (!infile)
{
fprintf (stderr, "Failed to open %s\n", filename );
exit (1);
}
while (fgets (buffer, sizeof(buffer), infile))
{
if (buffer[0] == '#')
continue;
strings = g_strsplit (buffer, "\t", -1);
locale = strings[0];
if (!locale[0])
locale = "C";
if (strcmp (locale, current_locale) != 0)
{
setlocale (LC_CTYPE, locale);
current_locale = setlocale (LC_CTYPE, NULL);
if (strncmp (current_locale, locale, 2) != 0)
{
fprintf (stderr, "Cannot set locale to %s, skipping\n", locale);
goto next;
}
}
test = strings[1];
/* gen-casemap-txt.py uses an empty string when a single character
* doesn't have an equivalent in a particular case; since that behavior
* is nonsense for multicharacter strings, it would make more sense
* to put the expected result .. the original character unchanged. But
* for now, we just work around it here and take the empty string to mean
* "same as original"
*/
convert = g_utf8_strup (test, -1);
expected = strings[4][0] ? strings[4] : test;
if (strcmp (convert, expected) != 0)
{
fprintf (stderr, "Failure: toupper(%s) == %s, should have been %s\n",
test, convert, expected);
result = 1;
}
g_free (convert);
convert = g_utf8_strdown (test, -1);
expected = strings[2][0] ? strings[2] : test;
if (strcmp (convert, expected) != 0)
{
fprintf (stderr, "Failure: tolower(%s) == %s, should have been %s\n",
test, convert, expected);
result = 1;
}
g_free (convert);
next:
g_strfreev (strings);
}
fclose (infile);
g_free (filename);
filename = g_test_build_filename (G_TEST_DIST, "casefold.txt", NULL);
infile = fopen (filename, "r");
if (!infile)
{
fprintf (stderr, "Failed to open %s\n", filename );
g_free (filename);
exit (1);
}
while (fgets (buffer, sizeof(buffer), infile))
{
if (buffer[0] == '#')
continue;
buffer[strlen(buffer) - 1] = '\0';
strings = g_strsplit (buffer, "\t", -1);
test = strings[0];
convert = g_utf8_casefold (test, -1);
if (strcmp (convert, strings[1]) != 0)
{
fprintf (stderr, "Failure: casefold(%s) == '%s', should have been '%s'\n",
test, convert, strings[1]);
result = 1;
}
g_free (convert);
g_strfreev (strings);
}
fclose (infile);
g_free (filename);
return result;
}