g_utf8_normalize: don't read past the end of the buffer

_g_utf8_normalize_wc() could read past the end of the provided buffer if it ends with a truncated multibyte character. If max_len is -1, it can continue reading until it encounters either a NUL or unreadable memory. Avoid this with extra bounds checks prior to g_utf8_get_char() to ensure that it does not read past either max_len or a NUL terminator.
2025-08-07 09:53:39 +02:00 · 2023-03-19 17:32:29 -10:00
parent e979fd9a15
commit 7f4726d151
1 changed files with 33 additions and 6 deletions
--- a/glib/gunidecomp.c
+++ b/glib/gunidecomp.c
@@ -388,9 +388,33 @@ _g_utf8_normalize_wc (const gchar    *str,
  while ((max_len < 0 || p < str + max_len) && *p)
    {
      const gchar *decomp;
-      gunichar wc = g_utf8_get_char (p);
+      const char *next, *between;
+      gunichar wc;

-      if (wc >= SBase && wc < SBase + SCount)
+      next = g_utf8_next_char (p);
+      /* Avoid reading truncated multibyte characters
+         which run past the end of the buffer */
+      if (max_len < 0)
+        {
+          /* Does the character contain a NUL terminator? */
+          for (between = &p[1]; between < next; between++)
+            {
+              if (G_UNLIKELY (!*between))
+                return NULL;
+            }
+        }
+      else
+        {
+          if (G_UNLIKELY (next > str + max_len))
+            return NULL;
+        }
+      wc = g_utf8_get_char (p);
+
+      if (G_UNLIKELY (wc == (gunichar) -1))
+        {
+          return NULL;
+        }
+      else if (wc >= SBase && wc < SBase + SCount)
        {
          gsize result_len;
          decompose_hangul (wc, NULL, &result_len);
@@ -406,7 +430,7 @@ _g_utf8_normalize_wc (const gchar    *str,
            n_wc++;
        }

-      p = g_utf8_next_char (p);
+      p = next;
    }

  wc_buffer = g_new (gunichar, n_wc + 1);
@@ -548,10 +572,13 @@ g_utf8_normalize (const gchar    *str,
 		  GNormalizeMode  mode)
 {
  gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
-  gchar *result;
+  gchar *result = NULL;

-  result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
-  g_free (result_wc);
+  if (G_LIKELY (result_wc != NULL))
+    {
+      result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
+      g_free (result_wc);
+    }

  return result;
 }