From 770059b588114ff279ac739fe9822eff608339f5 Mon Sep 17 00:00:00 2001
From: Matthias Clasen <mclasen@redhat.com>
Date: Thu, 29 Jul 2021 14:14:01 -0400
Subject: [PATCH 1/2] tests: Remove a misplaced comment

This comment had nothing to do with the test below.
---
 glib/tests/unicode.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
index bf1ad52ab..ff6da5226 100644
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -1426,8 +1426,6 @@ test_fully_decompose_len (void)
   }
 }
 
-/* Test that g_unichar_decompose() returns the correct value for various
- * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
 static void
 test_iso15924 (void)
 {

From 9599a9451c74bfb4b7f44d1fbfe45301fbd3f06b Mon Sep 17 00:00:00 2001
From: Matthias Clasen <mclasen@redhat.com>
Date: Thu, 29 Jul 2021 14:17:36 -0400
Subject: [PATCH 2/2] Add a test for Unicode normalization

This test verifies the examples from the Unicode
Annex that defines normalization.
---
 glib/tests/unicode.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
index ff6da5226..aa485d051 100644
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -1426,6 +1426,54 @@ test_fully_decompose_len (void)
   }
 }
 
+/* Check various examples from Unicode Annex #15 for NFD and NFC
+ * normalization.
+ */
+static void
+test_normalization (void)
+{
+  const struct {
+    const char *source;
+    const char *nfd;
+    const char *nfc;
+  } tests[] = {
+    // Singletons
+    { "\xe2\x84\xab", "A\xcc\x8a", "Å" }, // U+212B ANGSTROM SIGN
+    { "\xe2\x84\xa6", "Ω", "Ω" }, // U+2126 OHM SIGN
+    // Canonical Composites
+    { "Å", "A\xcc\x8a", "Å" }, // U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
+    { "ô", "o\xcc\x82", "ô" }, // U+00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
+    // Multiple Combining Marks
+    { "\xe1\xb9\xa9", "s\xcc\xa3\xcc\x87", "ṩ" }, // U+1E69 LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
+    { "\xe1\xb8\x8b\xcc\xa3", "d\xcc\xa3\xcc\x87", "ḍ̇" },
+    { "q\xcc\x87\xcc\xa3", "q\xcc\xa3\xcc\x87", "q̣̇" },
+    // Compatibility Composites
+    { "ﬁ", "ﬁ", "ﬁ" }, // U+FB01 LATIN SMALL LIGATURE FI
+    { "2\xe2\x81\xb5", "2\xe2\x81\xb5", "2⁵" },
+    { "\xe1\xba\x9b\xcc\xa3", "\xc5\xbf\xcc\xa3\xcc\x87", "ẛ̣" },
+
+    // Tests for behavior with reordered marks
+    { "s\xcc\x87\xcc\xa3", "s\xcc\xa3\xcc\x87", "ṩ" },
+    { "α\xcc\x94\xcd\x82", "α\xcc\x94\xcd\x82", "ἇ" },
+    { "α\xcd\x82\xcc\x94", "α\xcd\x82\xcc\x94", "ᾶ\xcc\x94" },
+  };
+  gsize i;
+
+  for (i = 0; i < G_N_ELEMENTS (tests); i++)
+    {
+      char *nfd, *nfc;
+
+      nfd = g_utf8_normalize (tests[i].source, -1, G_NORMALIZE_NFD);
+      g_assert_cmpstr (nfd, ==, tests[i].nfd);
+
+      nfc = g_utf8_normalize (tests[i].nfd, -1, G_NORMALIZE_NFC);
+      g_assert_cmpstr (nfc, ==, tests[i].nfc);
+
+      g_free (nfd);
+      g_free (nfc);
+    }
+}
+
 static void
 test_iso15924 (void)
 {
@@ -1672,6 +1720,7 @@ main (int   argc,
   g_test_add_func ("/unicode/digit-value", test_digit_value);
   g_test_add_func ("/unicode/fully-decompose-canonical", test_fully_decompose_canonical);
   g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
+  g_test_add_func ("/unicode/normalization", test_normalization);
   g_test_add_func ("/unicode/graph", test_graph);
   g_test_add_func ("/unicode/iso15924", test_iso15924);
   g_test_add_func ("/unicode/lower", test_lower);