From 568720006cd1da3390c239915337ed0a56a23f2e Mon Sep 17 00:00:00 2001
From: Emmanuel Fleury <emmanuel.fleury@u-bordeaux.fr>
Date: Wed, 3 Jul 2019 16:21:01 +0200
Subject: [PATCH] Add a missing check to g_utf8_get_char_validated()

g_utf8_get_char_validated() was not exactly matching its
documentation. The function was not checking if the sequence of
unicode characters was free of null bytes before performing a more
in-depth validation.

Fix issue #1052
---
 glib/gutf8.c               | 5 +++++
 glib/tests/utf8-validate.c | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/glib/gutf8.c b/glib/gutf8.c
index cba97db7d..b67d09362 100644
--- a/glib/gutf8.c
+++ b/glib/gutf8.c
@@ -685,6 +685,11 @@ g_utf8_get_char_validated (const gchar *p,
 
   result = g_utf8_get_char_extended (p, max_len);
 
+  /* Disallow codepoint U+0000 as it’s a nul byte,
+   * and all string handling in GLib is nul-terminated */
+  if (result == 0 && max_len > 0)
+    return (gunichar) -2;
+
   if (result & 0x80000000)
     return result;
   else if (!UNICODE_VALID (result))
diff --git a/glib/tests/utf8-validate.c b/glib/tests/utf8-validate.c
index 5806b29a0..51543f4b2 100644
--- a/glib/tests/utf8-validate.c
+++ b/glib/tests/utf8-validate.c
@@ -316,6 +316,11 @@ test_utf8_get_char_validated (void)
      * that’s how it’s documented: */
     { "", 0, (gunichar) -2 },
     { "", -1, (gunichar) 0 },
+    { "\0", 1, (gunichar) -2 },
+    { "AB\0", 3, 'A' },
+    { "A\0B", 3, 'A' },
+    { "\0AB", 3, (gunichar) -2 },
+    { "\xD8\0", 2, (gunichar) -2 },
     /* Normal inputs: */
     { "hello", 5, (gunichar) 'h' },
     { "hello", -1, (gunichar) 'h' },
@@ -323,6 +328,7 @@ test_utf8_get_char_validated (void)
     { "\xD8\x9F", -1, 0x061F },
     { "\xD8\x9Fmore", 6, 0x061F },
     { "\xD8\x9Fmore", -1, 0x061F },
+    { "\xD8\x9F\0", 3, 0x061F },
     { "\xE2\x96\xB3", 3, 0x25B3 },
     { "\xE2\x96\xB3", -1, 0x25B3 },
     { "\xE2\x96\xB3more", 7, 0x25B3 },