glib2/glib2-CVE-2026-1489.patch

From 662aa569efa65eaa4672ab0671eb8533a354cd89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
Date: Wed, 21 Jan 2026 22:00:17 +0100
Subject: [PATCH 1/4] guniprop: Use size_t for output_marks length

The input string length may overflow, and this would lead to wrong
behavior and invalid writes.

Spotted by treeplus.
Thanks to the Sovereign Tech Resilience programme from the Sovereign
Tech Agency.

ID: #YWH-PGM9867-171
Closes: #3872
---
 glib/guniprop.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/glib/guniprop.c b/glib/guniprop.c
index fe0033fd6b..1a0cc64089 100644
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -772,13 +772,13 @@ get_locale_type (void)
   return LOCALE_NORMAL;
 }

-static gint
+static size_t
 output_marks (const char **p_inout,
 	      char        *out_buffer,
 	      gboolean     remove_dot)
 {
   const char *p = *p_inout;
-  gint len = 0;
+  size_t len = 0;

   while (*p)
     {
--
GitLab


From 58356619525a1d565df8cc348e9784716f020f2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
Date: Wed, 21 Jan 2026 22:01:49 +0100
Subject: [PATCH 2/4] guniprop: Do not convert size_t to gint

We were correctly using size_t in output_special_case() since commit
362f92b69, but then we converted the value back to int

Related to: #3872
---
 glib/guniprop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/glib/guniprop.c b/glib/guniprop.c
index 1a0cc64089..fe50a287c4 100644
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -798,7 +798,7 @@ output_marks (const char **p_inout,
   return len;
 }

-static gint
+static size_t
 output_special_case (gchar *out_buffer,
 		     int    offset,
 		     int    type,
--
GitLab


From 170dc8c4068db4c4cbf63c7d27192e230436da21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
Date: Wed, 21 Jan 2026 22:04:22 +0100
Subject: [PATCH 3/4] guniprop: Ensure we do not overflow size in
 g_utf8_{strdown,gstrup}()

While this is technically not a security issue, when repeatedly adding
to a size_t value, we can overflow and start from 0.

Now, while being unlikely, technically an utf8 lower or upper string can
have a longer size than the input value, and if the output string is
bigger than G_MAXSIZE we'd end up cutting it silently.

Let's instead assert each time we increase the output length
---
 glib/guniprop.c | 107 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 69 insertions(+), 38 deletions(-)

diff --git a/glib/guniprop.c b/glib/guniprop.c
index fe50a287c4..86020b6e0f 100644
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -772,14 +772,36 @@ get_locale_type (void)
   return LOCALE_NORMAL;
 }

-static size_t
-output_marks (const char **p_inout,
-	      char        *out_buffer,
-	      gboolean     remove_dot)
+G_ALWAYS_INLINE static inline void
+increase_size (size_t *sizeptr, size_t add)
+{
+  g_assert (G_MAXSIZE - *(sizeptr) >= add);
+  *(sizeptr) += add;
+}
+
+G_ALWAYS_INLINE static inline void
+append_utf8_char_to_buffer (gunichar  c,
+                            char     *out_buffer,
+                            size_t   *in_out_len)
+{
+  gint utf8_len;
+  char *buffer;
+
+  buffer = out_buffer ? out_buffer + *(in_out_len) : NULL;
+  utf8_len = g_unichar_to_utf8 (c, buffer);
+
+  g_assert (utf8_len >= 0);
+  increase_size (in_out_len, utf8_len);
+}
+
+static void
+append_mark (const char **p_inout,
+             char        *out_buffer,
+             size_t      *in_out_len,
+             gboolean     remove_dot)
 {
   const char *p = *p_inout;
-  size_t len = 0;
-
+
   while (*p)
     {
       gunichar c = g_utf8_get_char (p);
@@ -787,7 +809,7 @@ output_marks (const char **p_inout,
       if (ISMARK (TYPE (c)))
 	{
 	  if (!remove_dot || c != 0x307 /* COMBINING DOT ABOVE */)
-	    len += g_unichar_to_utf8 (c, out_buffer ? out_buffer + len : NULL);
+            append_utf8_char_to_buffer (c, out_buffer, in_out_len);
 	  p = g_utf8_next_char (p);
 	}
       else
@@ -795,14 +817,14 @@ output_marks (const char **p_inout,
     }

   *p_inout = p;
-  return len;
 }

-static size_t
-output_special_case (gchar *out_buffer,
-		     int    offset,
-		     int    type,
-		     int    which)
+static void
+append_special_case (char   *out_buffer,
+                     size_t *in_out_len,
+                     int     offset,
+                     int     type,
+                     int     which)
 {
   const gchar *p = special_case_table + offset;
   size_t len;
@@ -814,10 +836,12 @@ output_special_case (gchar *out_buffer,
     p += strlen (p) + 1;

   len = strlen (p);
+  g_assert (len < G_MAXSIZE - *in_out_len);
+
   if (out_buffer)
-    memcpy (out_buffer, p, len);
+    memcpy (out_buffer + *in_out_len, p, len);

-  return len;
+  increase_size (in_out_len, len);
 }

 static gsize
@@ -858,11 +882,13 @@ real_toupper (const gchar *str,
 		  decomp_len = g_unichar_fully_decompose (c, FALSE, decomp, G_N_ELEMENTS (decomp));
 		  for (i=0; i < decomp_len; i++)
 		    {
+
 		      if (decomp[i] != 0x307 /* COMBINING DOT ABOVE */)
-			len += g_unichar_to_utf8 (g_unichar_toupper (decomp[i]), out_buffer ? out_buffer + len : NULL);
+                        append_utf8_char_to_buffer (g_unichar_toupper (decomp[i]),
+                                                    out_buffer, &len);
 		    }
-
-		  len += output_marks (&p, out_buffer ? out_buffer + len : NULL, TRUE);
+
+                  append_mark (&p, out_buffer, &len, TRUE);

 		  continue;
 		}
@@ -875,17 +901,17 @@ real_toupper (const gchar *str,
       if (locale_type == LOCALE_TURKIC && c == 'i')
 	{
 	  /* i => LATIN CAPITAL LETTER I WITH DOT ABOVE */
-	  len += g_unichar_to_utf8 (0x130, out_buffer ? out_buffer + len : NULL);
+          append_utf8_char_to_buffer (0x130, out_buffer, &len);
 	}
       else if (c == 0x0345)	/* COMBINING GREEK YPOGEGRAMMENI */
 	{
 	  /* Nasty, need to move it after other combining marks .. this would go away if
 	   * we normalized first.
 	   */
-	  len += output_marks (&p, out_buffer ? out_buffer + len : NULL, FALSE);
+          append_mark (&p, out_buffer, &len, TRUE);

 	  /* And output as GREEK CAPITAL LETTER IOTA */
-	  len += g_unichar_to_utf8 (0x399, out_buffer ? out_buffer + len : NULL);
+          append_utf8_char_to_buffer (0x399, out_buffer, &len);
 	}
       else if (IS (t,
 		   OR (G_UNICODE_LOWERCASE_LETTER,
@@ -896,8 +922,8 @@ real_toupper (const gchar *str,

 	  if (val >= 0x1000000)
 	    {
-	      len += output_special_case (out_buffer ? out_buffer + len : NULL, val - 0x1000000, t,
-					  t == G_UNICODE_LOWERCASE_LETTER ? 0 : 1);
+              append_special_case (out_buffer, &len, val - 0x1000000, t,
+                                   t == G_UNICODE_LOWERCASE_LETTER ? 0 : 1);
 	    }
 	  else
 	    {
@@ -917,7 +943,7 @@ real_toupper (const gchar *str,
 	      /* Some lowercase letters, e.g., U+000AA, FEMININE ORDINAL INDICATOR,
 	       * do not have an uppercase equivalent, in which case val will be
 	       * zero. */
-	      len += g_unichar_to_utf8 (val ? val : c, out_buffer ? out_buffer + len : NULL);
+              append_utf8_char_to_buffer (val ? val : c, out_buffer, &len);
 	    }
 	}
       else
@@ -927,7 +953,7 @@ real_toupper (const gchar *str,
 	  if (out_buffer)
 	    memcpy (out_buffer + len, last, char_len);

-	  len += char_len;
+          increase_size (&len, char_len);
 	}

     }
@@ -965,6 +991,8 @@ g_utf8_strup (const gchar *str,
    * We use a two pass approach to keep memory management simple
    */
   result_len = real_toupper (str, len, NULL, locale_type);
+  g_assert (result_len < G_MAXSIZE);
+
   result = g_malloc (result_len + 1);
   real_toupper (str, len, result, locale_type);
   result[result_len] = '\0';
@@ -1022,14 +1050,15 @@ real_tolower (const gchar *str,
             {
               /* I + COMBINING DOT ABOVE => i (U+0069)
                * LATIN CAPITAL LETTER I WITH DOT ABOVE => i (U+0069) */
-              len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
+              append_utf8_char_to_buffer (0x0069, out_buffer, &len);
+
               if (combining_dot)
                 p = g_utf8_next_char (p);
             }
           else
             {
               /* I => LATIN SMALL LETTER DOTLESS I */
-              len += g_unichar_to_utf8 (0x131, out_buffer ? out_buffer + len : NULL);
+              append_utf8_char_to_buffer (0x131, out_buffer, &len);
             }
         }
       /* Introduce an explicit dot above when lowercasing capital I's and J's
@@ -1037,19 +1066,19 @@ real_tolower (const gchar *str,
       else if (locale_type == LOCALE_LITHUANIAN &&
                (c == 0x00cc || c == 0x00cd || c == 0x0128))
         {
-          len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
-          len += g_unichar_to_utf8 (0x0307, out_buffer ? out_buffer + len : NULL);
+          append_utf8_char_to_buffer (0x0069, out_buffer, &len);
+          append_utf8_char_to_buffer (0x0307, out_buffer, &len);

           switch (c)
             {
             case 0x00cc:
-              len += g_unichar_to_utf8 (0x0300, out_buffer ? out_buffer + len : NULL);
+              append_utf8_char_to_buffer (0x0300, out_buffer, &len);
               break;
             case 0x00cd:
-              len += g_unichar_to_utf8 (0x0301, out_buffer ? out_buffer + len : NULL);
+              append_utf8_char_to_buffer (0x0301, out_buffer, &len);
               break;
             case 0x0128:
-              len += g_unichar_to_utf8 (0x0303, out_buffer ? out_buffer + len : NULL);
+              append_utf8_char_to_buffer (0x0303, out_buffer, &len);
               break;
             }
         }
@@ -1058,8 +1087,8 @@ real_tolower (const gchar *str,
                 c == 'J' || c == G_UNICHAR_FULLWIDTH_J || c == 0x012e) &&
                has_more_above (p))
         {
-          len += g_unichar_to_utf8 (g_unichar_tolower (c), out_buffer ? out_buffer + len : NULL);
-          len += g_unichar_to_utf8 (0x0307, out_buffer ? out_buffer + len : NULL);
+          append_utf8_char_to_buffer (g_unichar_tolower (c), out_buffer, &len);
+          append_utf8_char_to_buffer (0x0307, out_buffer, &len);
         }
       else if (c == 0x03A3)	/* GREEK CAPITAL LETTER SIGMA */
 	{
@@ -1082,7 +1111,7 @@ real_tolower (const gchar *str,
 	  else
 	    val = 0x3c2;	/* GREEK SMALL FINAL SIGMA */

-	  len += g_unichar_to_utf8 (val, out_buffer ? out_buffer + len : NULL);
+          append_utf8_char_to_buffer (val, out_buffer, &len);
 	}
       else if (IS (t,
 		   OR (G_UNICODE_UPPERCASE_LETTER,
@@ -1093,7 +1122,7 @@ real_tolower (const gchar *str,

 	  if (val >= 0x1000000)
 	    {
-	      len += output_special_case (out_buffer ? out_buffer + len : NULL, val - 0x1000000, t, 0);
+              append_special_case (out_buffer, &len, val - 0x1000000, t, 0);
 	    }
 	  else
 	    {
@@ -1112,7 +1141,7 @@ real_tolower (const gchar *str,

 	      /* Not all uppercase letters are guaranteed to have a lowercase
 	       * equivalent.  If this is the case, val will be zero. */
-	      len += g_unichar_to_utf8 (val ? val : c, out_buffer ? out_buffer + len : NULL);
+              append_utf8_char_to_buffer (val ? val : c, out_buffer, &len);
 	    }
 	}
       else
@@ -1122,7 +1151,7 @@ real_tolower (const gchar *str,
 	  if (out_buffer)
 	    memcpy (out_buffer + len, last, char_len);

-	  len += char_len;
+          increase_size (&len, char_len);
 	}

     }
@@ -1159,6 +1188,8 @@ g_utf8_strdown (const gchar *str,
    * We use a two pass approach to keep memory management simple
    */
   result_len = real_tolower (str, len, NULL, locale_type);
+  g_assert (result_len < G_MAXSIZE);
+
   result = g_malloc (result_len + 1);
   real_tolower (str, len, result, locale_type);
   result[result_len] = '\0';
--
GitLab


From b96966058f4291db8970ced70ee22103e63679e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
Date: Fri, 23 Jan 2026 17:39:34 +0100
Subject: [PATCH 4/4] glib/tests/unicode: Add test debug information when
 parsing input files

On case of failures makes it easier to understand on what line of the
source file we're at, as it might not be clear for non-ascii chars
---
 glib/tests/unicode.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
index 90b5a98b8f..44d1083dd5 100644
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -622,6 +622,7 @@ test_casemap_and_casefold (void)
   const char *locale;
   const char *test;
   const char *expected;
+  size_t line = 0;
   char *convert;
   char *current_locale = setlocale (LC_CTYPE, NULL);
   char *old_lc_all, *old_lc_messages, *old_lang;
@@ -642,6 +643,7 @@ test_casemap_and_casefold (void)

   while (fgets (buffer, sizeof (buffer), infile))
     {
+      line++;
       if (buffer[0] == '#')
         continue;

@@ -684,6 +686,9 @@ test_casemap_and_casefold (void)

       convert = g_utf8_strup (test, -1);
       expected = strings[4][0] ? strings[4] : test;
+      g_test_message ("Converting '%s' => '%s' (line %" G_GSIZE_FORMAT ")",
+                      test, expected, line);
+
       g_assert_cmpstr (convert, ==, expected);
       g_free (convert);

@@ -703,9 +708,11 @@ test_casemap_and_casefold (void)

   infile = g_fopen (filename, "re");
   g_assert (infile != NULL);
+  line = 0;

   while (fgets (buffer, sizeof (buffer), infile))
     {
+      line++;
       if (buffer[0] == '#')
         continue;

@@ -715,6 +722,9 @@ test_casemap_and_casefold (void)
       test = strings[0];

       convert = g_utf8_casefold (test, -1);
+      g_test_message ("Converting '%s' => '%s' (line %" G_GSIZE_FORMAT ")",
+                      test, strings[1], line);
+
       g_assert_cmpstr (convert, ==, strings[1]);
       g_free (convert);

--
GitLab