From 43759ca951e40d50f00f319d5c2cb422bce268b7 Mon Sep 17 00:00:00 2001 From: Emmanuel Fleury Date: Sun, 3 Apr 2022 12:46:43 +0200 Subject: [PATCH 1/2] Convert tests/unicode-encoding.c to glib test framework --- tests/unicode-encoding.c | 389 ++++++++++++++------------------------- 1 file changed, 141 insertions(+), 248 deletions(-) diff --git a/tests/unicode-encoding.c b/tests/unicode-encoding.c index b0603d105..f9853ec06 100644 --- a/tests/unicode-encoding.c +++ b/tests/unicode-encoding.c @@ -1,40 +1,9 @@ -#undef G_DISABLE_ASSERT -#undef G_LOG_DOMAIN - #include #include #include #include #include -static gint exit_status = 0; - -G_GNUC_PRINTF (1, 2) -static void -croak (char *format, ...) -{ - va_list va; - - va_start (va, format); - vfprintf (stderr, format, va); - va_end (va); - - exit (1); -} - -G_GNUC_PRINTF (1, 2) -static void -fail (char *format, ...) -{ - va_list va; - - va_start (va, format); - vfprintf (stderr, format, va); - va_end (va); - - exit_status |= 1; -} - typedef enum { VALID, @@ -72,29 +41,19 @@ static gint utf16_count (gunichar2 *a) { gint result = 0; - + while (a[result]) result++; return result; } -static void -print_ucs4 (const gchar *prefix, gunichar *ucs4, gint ucs4_len) -{ - gint i; - g_print ("%s ", prefix); - for (i = 0; i < ucs4_len; i++) - g_print ("%x ", ucs4[i]); - g_print ("\n"); -} - static void process (gint line, - gchar *utf8, - Status status, - gunichar *ucs4, - gint ucs4_len) + gchar *utf8, + Status status, + gunichar *ucs4, + gint ucs4_len) { const gchar *end; gboolean is_valid = g_utf8_validate (utf8, -1, &end); @@ -104,21 +63,14 @@ process (gint line, switch (status) { case VALID: - if (!is_valid) - { - fail ("line %d: valid but g_utf8_validate returned FALSE\n", line); - return; - } + g_assert_true (is_valid); break; + case NOTUNICODE: case INCOMPLETE: case OVERLONG: case MALFORMED: - if (is_valid) - { - fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line); - return; - } + g_assert_false (is_valid); break; } @@ -128,20 +80,14 @@ process (gint line, ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error); - if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT)) - { - fail ("line %d: incomplete input not properly detected\n", line); - return; - } + g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT); + g_clear_error (&error); ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error); - - if (!ucs4_result || items_read == (glong) strlen (utf8)) - { - fail ("line %d: incomplete input not properly detected\n", line); - return; - } + g_assert_no_error (error); + g_assert_nonnull (ucs4_result); + g_assert_cmpint (items_read, !=, strlen (utf8)); g_free (ucs4_result); } @@ -151,21 +97,12 @@ process (gint line, gunichar *ucs4_result; ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error); - if (!ucs4_result) - { - fail ("line %d: conversion with status %d to ucs4 failed: %s\n", line, status, error->message); - return; - } + g_assert_no_error (error); + g_assert_nonnull (ucs4_result); - if (!ucs4_equal (ucs4_result, ucs4) || - items_read != (glong) strlen (utf8) || - items_written != ucs4_len) - { - fail ("line %d: results of conversion with status %d to ucs4 do not match expected.\n", line, status); - print_ucs4 ("expected: ", ucs4, ucs4_len); - print_ucs4 ("received: ", ucs4_result, items_written); - return; - } + g_assert_true (ucs4_equal (ucs4_result, ucs4)); + g_assert_cmpint (items_read, ==, strlen (utf8)); + g_assert_cmpint (items_written, ==, ucs4_len); g_free (ucs4_result); } @@ -176,30 +113,18 @@ process (gint line, gchar *utf8_result; ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written); + g_assert_nonnull (ucs4_result); - if (!ucs4_equal (ucs4_result, ucs4) || - items_written != ucs4_len) - { - fail ("line %d: results of fast conversion with status %d to ucs4 do not match expected.\n", line, status); - print_ucs4 ("expected: ", ucs4, ucs4_len); - print_ucs4 ("received: ", ucs4_result, items_written); - return; - } + g_assert_true (ucs4_equal (ucs4_result, ucs4)); + g_assert_cmpint (items_written, ==, ucs4_len); utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error); - if (!utf8_result) - { - fail ("line %d: conversion back to utf8 failed: %s", line, error->message); - return; - } + g_assert_no_error (error); + g_assert_nonnull (utf8_result); - if (strcmp (utf8_result, utf8) != 0 || - items_read != ucs4_len || - items_written != (glong) strlen (utf8)) - { - fail ("line %d: conversion back to utf8 did not match original\n", line); - return; - } + g_assert_cmpstr ((char *) utf8_result, ==, (char *) utf8); + g_assert_cmpint (items_read, ==, ucs4_len); + g_assert_cmpint (items_written, ==, strlen (utf8)); g_free (utf8_result); g_free (ucs4_result); @@ -222,101 +147,69 @@ process (gint line, #define TARGET "UTF-16" #endif - if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8", - NULL, &bytes_written, NULL))) - { - fail ("line %d: could not convert to UTF-16 via g_convert\n", line); - return; - } + utf16_expected_tmp = + (gunichar2 *) g_convert (utf8, -1, TARGET, "UTF-8", NULL, &bytes_written, NULL); + g_assert_nonnull (utf16_expected_tmp); - /* zero-terminate and remove BOM - */ + /* zero-terminate and remove BOM */ n_chars = bytes_written / 2; if (utf16_expected_tmp[0] == 0xfeff) /* BOM */ - { - n_chars--; - utf16_expected = g_new (gunichar2, n_chars + 1); - memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars); - } - else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */ - { - fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line); - return; - } + { + n_chars--; + utf16_expected = g_new (gunichar2, n_chars + 1); + memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars); + } else - { - utf16_expected = g_new (gunichar2, n_chars + 1); - memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars); - } + { + /* We expect the result of the conversion + via iconv() to be native-endian. */ + g_assert_false (utf16_expected_tmp[0] == 0xfffe); + utf16_expected = g_new (gunichar2, n_chars + 1); + memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars); + } utf16_expected[n_chars] = '\0'; - - if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error))) - { - fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); - return; - } - if (items_read != (glong) strlen (utf8) || - utf16_count (utf16_from_utf8) != items_written) - { - fail ("line %d: length error in conversion to ucs16\n", line); - return; - } + utf16_from_utf8 = + g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error); + g_assert_no_error (error); + g_assert_nonnull (utf16_from_utf8); - if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error))) - { - fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); - return; - } + g_assert_cmpint (items_read, ==, (glong) strlen (utf8)); + g_assert_cmpint (utf16_count (utf16_from_utf8), ==, items_written); - if (items_read != ucs4_len || - utf16_count (utf16_from_ucs4) != items_written) - { - fail ("line %d: length error in conversion to ucs16\n", line); - return; - } + utf16_from_ucs4 = + g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error); + g_assert_no_error (error); + g_assert_nonnull (utf16_from_ucs4); - if (!utf16_equal (utf16_from_utf8, utf16_expected) || - !utf16_equal (utf16_from_ucs4, utf16_expected)) - { - fail ("line %d: results of conversion to ucs16 do not match\n", line); - return; - } + g_assert_cmpint (items_read, ==, ucs4_len); + g_assert_cmpint (utf16_count (utf16_from_ucs4), ==, items_written); - if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error))) - { - fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message); - return; - } + g_assert_true (utf16_equal (utf16_from_utf8, utf16_expected)); + g_assert_true (utf16_equal (utf16_from_ucs4, utf16_expected)); + g_assert_cmpstr ((char *) utf16_from_utf8, ==, (char *) utf16_expected); + g_assert_cmpstr ((char *) utf16_from_ucs4, ==, (char *) utf16_expected); - if (items_read != utf16_count (utf16_from_utf8) || - items_written != (glong) strlen (utf8)) - { - fail ("line %d: length error in conversion from ucs16 to utf8\n", line); - return; - } + utf8_result = + g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error); + g_assert_no_error (error); + g_assert_nonnull (utf8_result); - if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error))) - { - fail ("line %d: conversion back to utf8/ucs4 failed\n", line); - return; - } + g_assert_cmpint (items_read, ==, utf16_count (utf16_from_utf8)); + g_assert_cmpint (items_written, ==, (glong) strlen (utf8)); - if (items_read != utf16_count (utf16_from_utf8) || - items_written != ucs4_len) - { - fail ("line %d: length error in conversion from ucs16 to ucs4\n", line); - return; - } + ucs4_result = + g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error); + g_assert_no_error (error); + g_assert_nonnull (ucs4_result); + + g_assert_cmpint (items_read, ==, utf16_count (utf16_from_utf8)); + g_assert_cmpint (items_written, ==, ucs4_len); + + g_assert_cmpstr (utf8, ==, utf8_result); + g_assert_cmpstr ((char *) ucs4, ==, (char *) ucs4_result); - if (strcmp (utf8, utf8_result) != 0 || - !ucs4_equal (ucs4, ucs4_result)) - { - fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line); - return; - } - g_free (utf16_expected_tmp); g_free (utf16_expected); g_free (utf16_from_utf8); @@ -326,8 +219,8 @@ process (gint line, } } -int -main (int argc, char **argv) +static void +test_unicode_encoding (void) { gchar *testfile; gchar *contents; @@ -336,18 +229,15 @@ main (int argc, char **argv) char *tmp; gint state = 0; gint line = 1; - gint start_line = 0; /* Quiet GCC */ - gchar *utf8 = NULL; /* Quiet GCC */ + gint start_line = 0; /* Quiet GCC */ + gchar *utf8 = NULL; /* Quiet GCC */ GArray *ucs4; - Status status = VALID; /* Quiet GCC */ - - g_test_init (&argc, &argv, NULL); + Status status = VALID; /* Quiet GCC */ testfile = g_test_build_filename (G_TEST_DIST, "utf8.txt", NULL); g_file_get_contents (testfile, &contents, NULL, &error); - if (error) - croak ("Cannot open utf8.txt: %s", error->message); + g_assert_no_error (error); ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar)); @@ -357,88 +247,91 @@ main (int argc, char **argv) while (*p) { while (*p && (*p == ' ' || *p == '\t')) - p++; + p++; end = p; while (*end && (*end != '\r' && *end != '\n')) - end++; - + end++; + if (!*p || *p == '#' || *p == '\r' || *p == '\n') - goto next_line; + goto next_line; tmp = g_strstrip (g_strndup (p, end - p)); - + switch (state) - { - case 0: - /* UTF-8 string */ - start_line = line; - utf8 = tmp; - tmp = NULL; - break; - - case 1: - /* Status */ - if (!strcmp (tmp, "VALID")) - status = VALID; - else if (!strcmp (tmp, "INCOMPLETE")) - status = INCOMPLETE; - else if (!strcmp (tmp, "NOTUNICODE")) - status = NOTUNICODE; - else if (!strcmp (tmp, "OVERLONG")) - status = OVERLONG; - else if (!strcmp (tmp, "MALFORMED")) - status = MALFORMED; - else - croak ("Invalid status on line %d\n", line); + { + case 0: + /* UTF-8 string */ + start_line = line; + utf8 = tmp; + tmp = NULL; + break; - if (status != VALID && status != NOTUNICODE) - state++; /* No UCS-4 data */ - - break; - - case 2: - /* UCS-4 version */ + case 1: + /* Status */ + if (!strcmp (tmp, "VALID")) + status = VALID; + else if (!strcmp (tmp, "INCOMPLETE")) + status = INCOMPLETE; + else if (!strcmp (tmp, "NOTUNICODE")) + status = NOTUNICODE; + else if (!strcmp (tmp, "OVERLONG")) + status = OVERLONG; + else if (!strcmp (tmp, "MALFORMED")) + status = MALFORMED; + else + g_assert_not_reached (); - p = strtok (tmp, " \t"); - while (p) - { - gchar *endptr; - - gunichar ch = strtoul (p, &endptr, 16); - if (*endptr != '\0') - croak ("Invalid UCS-4 character on line %d\n", line); + if (status != VALID && status != NOTUNICODE) + state++; /* No UCS-4 data */ + break; - g_array_append_val (ucs4, ch); - - p = strtok (NULL, " \t"); - } + case 2: + /* UCS-4 version */ + p = strtok (tmp, " \t"); + while (p) + { + gchar *endptr; + gunichar ch = strtoul (p, &endptr, 16); + g_assert_cmpint (*endptr, == ,'\0'); - break; - } + g_array_append_val (ucs4, ch); + + p = strtok (NULL, " \t"); + } + break; + } g_free (tmp); state = (state + 1) % 3; if (state == 0) - { - process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len); - g_array_set_size (ucs4, 0); - g_free (utf8); - } - + { + process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len); + g_array_set_size (ucs4, 0); + g_free (utf8); + } + next_line: p = end; if (*p && *p == '\r') - p++; + p++; if (*p && *p == '\n') - p++; - + p++; line++; } g_free (testfile); g_array_free (ucs4, TRUE); g_free (contents); - return exit_status; +} + +int +main (int argc, char **argv) +{ + g_test_init (&argc, &argv, NULL); + + g_test_add_func ("/unicode/encoding", test_unicode_encoding); + + return g_test_run (); } From 795952cb8dbfe6c817ae45909545457acf8c1828 Mon Sep 17 00:00:00 2001 From: Emmanuel Fleury Date: Sun, 3 Apr 2022 12:57:07 +0200 Subject: [PATCH 2/2] Move tests/unicode-encoding.c to glib/tests/unicode-encoding.c Related to issue #1434 --- glib/tests/meson.build | 8 ++++++++ {tests => glib/tests}/unicode-encoding.c | 0 {tests => glib/tests}/utf8.txt | 0 tests/meson.build | 8 -------- 4 files changed, 8 insertions(+), 8 deletions(-) rename {tests => glib/tests}/unicode-encoding.c (100%) rename {tests => glib/tests}/utf8.txt (100%) diff --git a/glib/tests/meson.build b/glib/tests/meson.build index 301158e0f..698d078d6 100644 --- a/glib/tests/meson.build +++ b/glib/tests/meson.build @@ -124,6 +124,7 @@ glib_tests = { 'utf8-misc' : {}, 'utils' : {}, 'unicode' : {}, + 'unicode-encoding' : {}, 'uri' : {}, '1bit-mutex' : {}, '1bit-emufutex' : { @@ -262,6 +263,13 @@ foreach test_name, extra_args : glib_tests ) endforeach +if installed_tests_enabled + install_data( + 'utf8.txt', + install_dir : installed_tests_execdir, + ) +endif + executable('spawn-path-search-helper', 'spawn-path-search-helper.c', c_args : test_cargs, dependencies : test_deps, diff --git a/tests/unicode-encoding.c b/glib/tests/unicode-encoding.c similarity index 100% rename from tests/unicode-encoding.c rename to glib/tests/unicode-encoding.c diff --git a/tests/utf8.txt b/glib/tests/utf8.txt similarity index 100% rename from tests/utf8.txt rename to glib/tests/utf8.txt diff --git a/tests/meson.build b/tests/meson.build index c95fa1d00..25b9f1f6c 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -26,7 +26,6 @@ tests = { 'spawn-test' : {}, 'thread-test' : {}, 'threadpool-test' : {'suite' : ['slow']}, - 'unicode-encoding' : {}, 'module-test-library' : { 'dependencies' : [libgmodule_dep], 'export_dynamic' : true, @@ -58,13 +57,6 @@ else } endif -if installed_tests_enabled - install_data( - 'utf8.txt', - install_dir : installed_tests_execdir, - ) -endif - module_suffix = [] # Keep the autotools convention for shared module suffix because GModule # depends on it: https://gitlab.gnome.org/GNOME/glib/issues/520