Merge branch 'libicu-tests' into 'main'

unicode: add tests for g_utf8_normalize() and empty strings See merge request GNOME/glib!3326
2025-12-13 20:08:29 +01:00 · 2023-04-13 21:35:51 +00:00
parent 18ae2a3d4e 6b39af34aa
commit 2af45f9c20
2 changed files with 53 additions and 1 deletions
--- a/glib/gunicode.h
+++ b/glib/gunicode.h
@@ -210,7 +210,7 @@ typedef enum
 * Since new unicode versions may add new types here, applications should be ready 
 * to handle unknown values. They may be regarded as %G_UNICODE_BREAK_UNKNOWN.
 *
- * See [Unicode Line Breaking Algorithm](http://www.unicode.org/unicode/reports/tr14/).
+ * See [Unicode Line Breaking Algorithm](https://www.unicode.org/reports/tr14/).
 */
 typedef enum
 {
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -459,6 +459,10 @@ test_strup (void)
  /* Tricky, comparing two unicode strings with an ASCII function */
  g_assert_cmpstr (str_up, ==, "AAZZ09X;\003E\357\274\241\357\274\241");
  g_free (str_up);
+
+  str_up = g_utf8_strup ("", 0);
+  g_assert_cmpstr (str_up, ==, "");
+  g_free (str_up);
 }

 /* Test that g_utf8_strdown() returns the correct value for various
@@ -484,6 +488,10 @@ test_strdown (void)
  /* Tricky, comparing two unicode strings with an ASCII function */
  g_assert_cmpstr (str_down, ==, "aazz09x;\003\007\357\275\201\357\275\201");
  g_free (str_down);
+
+  str_down = g_utf8_strdown ("", 0);
+  g_assert_cmpstr (str_down, ==, "");
+  g_free (str_down);
 }

 /* Test that g_utf8_strup() and g_utf8_strdown() return the correct
@@ -576,6 +584,10 @@ test_casefold (void)
  /* Tricky, comparing two unicode strings with an ASCII function */
  g_assert_cmpstr (str_casefold, ==, "aazz09x;\357\275\201\357\275\201");
  g_free (str_casefold);
+
+  str_casefold = g_utf8_casefold ("", 0);
+  g_assert_cmpstr (str_casefold, ==, "");
+  g_free (str_casefold);
 }

 static void
@@ -1889,6 +1901,45 @@ test_iso15924 (void)
 #undef PACK
 }

+static void
+test_normalize (void)
+{
+  guint i;
+  typedef struct
+  {
+    const gchar *str;
+    const gchar *nfd;
+    const gchar *nfc;
+    const gchar *nfkd;
+    const gchar *nfkc;
+  } Test;
+  Test tests[] = {
+    { "Äffin", "A\u0308ffin", "Äffin", "A\u0308ffin", "Äffin" },
+    { "Ä\uFB03n", "A\u0308\uFB03n", "Ä\uFB03n", "A\u0308ffin", "Äffin" },
+    { "Henry IV", "Henry IV", "Henry IV", "Henry IV", "Henry IV" },
+    { "Henry \u2163", "Henry \u2163", "Henry \u2163", "Henry IV", "Henry IV" },
+    { "non-utf\x88", NULL, NULL, NULL, NULL },
+    { "", "", "", "", "" },
+  };
+
+#define TEST(str, mode, expected)                         \
+  {                                                       \
+    gchar *normalized = g_utf8_normalize (str, -1, mode); \
+    g_assert_cmpstr (normalized, ==, expected);           \
+    g_free (normalized);                                  \
+  }
+
+  for (i = 0; i < G_N_ELEMENTS (tests); i++)
+    {
+      TEST (tests[i].str, G_NORMALIZE_NFD, tests[i].nfd);
+      TEST (tests[i].str, G_NORMALIZE_NFC, tests[i].nfc);
+      TEST (tests[i].str, G_NORMALIZE_NFKD, tests[i].nfkd);
+      TEST (tests[i].str, G_NORMALIZE_NFKC, tests[i].nfkc);
+    }
+
+#undef TEST
+}
+
 int
 main (int   argc,
      char *argv[])
@@ -1933,6 +1984,7 @@ main (int   argc,
  g_test_add_func ("/unicode/xdigit", test_xdigit);
  g_test_add_func ("/unicode/xdigit-value", test_xdigit_value);
  g_test_add_func ("/unicode/zero-width", test_zerowidth);
+  g_test_add_func ("/unicode/normalize", test_normalize);

  return g_test_run();
 }