OBS-URL: https://build.opensuse.org/package/show/GNOME:Factory/glib2?expand=0&rev=592
429 lines
14 KiB
Diff
429 lines
14 KiB
Diff
From 662aa569efa65eaa4672ab0671eb8533a354cd89 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
|
|
Date: Wed, 21 Jan 2026 22:00:17 +0100
|
|
Subject: [PATCH 1/4] guniprop: Use size_t for output_marks length
|
|
|
|
The input string length may overflow, and this would lead to wrong
|
|
behavior and invalid writes.
|
|
|
|
Spotted by treeplus.
|
|
Thanks to the Sovereign Tech Resilience programme from the Sovereign
|
|
Tech Agency.
|
|
|
|
ID: #YWH-PGM9867-171
|
|
Closes: #3872
|
|
---
|
|
glib/guniprop.c | 4 ++--
|
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/glib/guniprop.c b/glib/guniprop.c
|
|
index fe0033fd6b..1a0cc64089 100644
|
|
--- a/glib/guniprop.c
|
|
+++ b/glib/guniprop.c
|
|
@@ -772,13 +772,13 @@ get_locale_type (void)
|
|
return LOCALE_NORMAL;
|
|
}
|
|
|
|
-static gint
|
|
+static size_t
|
|
output_marks (const char **p_inout,
|
|
char *out_buffer,
|
|
gboolean remove_dot)
|
|
{
|
|
const char *p = *p_inout;
|
|
- gint len = 0;
|
|
+ size_t len = 0;
|
|
|
|
while (*p)
|
|
{
|
|
--
|
|
GitLab
|
|
|
|
|
|
From 58356619525a1d565df8cc348e9784716f020f2f Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
|
|
Date: Wed, 21 Jan 2026 22:01:49 +0100
|
|
Subject: [PATCH 2/4] guniprop: Do not convert size_t to gint
|
|
|
|
We were correctly using size_t in output_special_case() since commit
|
|
362f92b69, but then we converted the value back to int
|
|
|
|
Related to: #3872
|
|
---
|
|
glib/guniprop.c | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/glib/guniprop.c b/glib/guniprop.c
|
|
index 1a0cc64089..fe50a287c4 100644
|
|
--- a/glib/guniprop.c
|
|
+++ b/glib/guniprop.c
|
|
@@ -798,7 +798,7 @@ output_marks (const char **p_inout,
|
|
return len;
|
|
}
|
|
|
|
-static gint
|
|
+static size_t
|
|
output_special_case (gchar *out_buffer,
|
|
int offset,
|
|
int type,
|
|
--
|
|
GitLab
|
|
|
|
|
|
From 170dc8c4068db4c4cbf63c7d27192e230436da21 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
|
|
Date: Wed, 21 Jan 2026 22:04:22 +0100
|
|
Subject: [PATCH 3/4] guniprop: Ensure we do not overflow size in
|
|
g_utf8_{strdown,gstrup}()
|
|
|
|
While this is technically not a security issue, when repeatedly adding
|
|
to a size_t value, we can overflow and start from 0.
|
|
|
|
Now, while being unlikely, technically an utf8 lower or upper string can
|
|
have a longer size than the input value, and if the output string is
|
|
bigger than G_MAXSIZE we'd end up cutting it silently.
|
|
|
|
Let's instead assert each time we increase the output length
|
|
---
|
|
glib/guniprop.c | 107 +++++++++++++++++++++++++++++++-----------------
|
|
1 file changed, 69 insertions(+), 38 deletions(-)
|
|
|
|
diff --git a/glib/guniprop.c b/glib/guniprop.c
|
|
index fe50a287c4..86020b6e0f 100644
|
|
--- a/glib/guniprop.c
|
|
+++ b/glib/guniprop.c
|
|
@@ -772,14 +772,36 @@ get_locale_type (void)
|
|
return LOCALE_NORMAL;
|
|
}
|
|
|
|
-static size_t
|
|
-output_marks (const char **p_inout,
|
|
- char *out_buffer,
|
|
- gboolean remove_dot)
|
|
+G_ALWAYS_INLINE static inline void
|
|
+increase_size (size_t *sizeptr, size_t add)
|
|
+{
|
|
+ g_assert (G_MAXSIZE - *(sizeptr) >= add);
|
|
+ *(sizeptr) += add;
|
|
+}
|
|
+
|
|
+G_ALWAYS_INLINE static inline void
|
|
+append_utf8_char_to_buffer (gunichar c,
|
|
+ char *out_buffer,
|
|
+ size_t *in_out_len)
|
|
+{
|
|
+ gint utf8_len;
|
|
+ char *buffer;
|
|
+
|
|
+ buffer = out_buffer ? out_buffer + *(in_out_len) : NULL;
|
|
+ utf8_len = g_unichar_to_utf8 (c, buffer);
|
|
+
|
|
+ g_assert (utf8_len >= 0);
|
|
+ increase_size (in_out_len, utf8_len);
|
|
+}
|
|
+
|
|
+static void
|
|
+append_mark (const char **p_inout,
|
|
+ char *out_buffer,
|
|
+ size_t *in_out_len,
|
|
+ gboolean remove_dot)
|
|
{
|
|
const char *p = *p_inout;
|
|
- size_t len = 0;
|
|
-
|
|
+
|
|
while (*p)
|
|
{
|
|
gunichar c = g_utf8_get_char (p);
|
|
@@ -787,7 +809,7 @@ output_marks (const char **p_inout,
|
|
if (ISMARK (TYPE (c)))
|
|
{
|
|
if (!remove_dot || c != 0x307 /* COMBINING DOT ABOVE */)
|
|
- len += g_unichar_to_utf8 (c, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (c, out_buffer, in_out_len);
|
|
p = g_utf8_next_char (p);
|
|
}
|
|
else
|
|
@@ -795,14 +817,14 @@ output_marks (const char **p_inout,
|
|
}
|
|
|
|
*p_inout = p;
|
|
- return len;
|
|
}
|
|
|
|
-static size_t
|
|
-output_special_case (gchar *out_buffer,
|
|
- int offset,
|
|
- int type,
|
|
- int which)
|
|
+static void
|
|
+append_special_case (char *out_buffer,
|
|
+ size_t *in_out_len,
|
|
+ int offset,
|
|
+ int type,
|
|
+ int which)
|
|
{
|
|
const gchar *p = special_case_table + offset;
|
|
size_t len;
|
|
@@ -814,10 +836,12 @@ output_special_case (gchar *out_buffer,
|
|
p += strlen (p) + 1;
|
|
|
|
len = strlen (p);
|
|
+ g_assert (len < G_MAXSIZE - *in_out_len);
|
|
+
|
|
if (out_buffer)
|
|
- memcpy (out_buffer, p, len);
|
|
+ memcpy (out_buffer + *in_out_len, p, len);
|
|
|
|
- return len;
|
|
+ increase_size (in_out_len, len);
|
|
}
|
|
|
|
static gsize
|
|
@@ -858,11 +882,13 @@ real_toupper (const gchar *str,
|
|
decomp_len = g_unichar_fully_decompose (c, FALSE, decomp, G_N_ELEMENTS (decomp));
|
|
for (i=0; i < decomp_len; i++)
|
|
{
|
|
+
|
|
if (decomp[i] != 0x307 /* COMBINING DOT ABOVE */)
|
|
- len += g_unichar_to_utf8 (g_unichar_toupper (decomp[i]), out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (g_unichar_toupper (decomp[i]),
|
|
+ out_buffer, &len);
|
|
}
|
|
-
|
|
- len += output_marks (&p, out_buffer ? out_buffer + len : NULL, TRUE);
|
|
+
|
|
+ append_mark (&p, out_buffer, &len, TRUE);
|
|
|
|
continue;
|
|
}
|
|
@@ -875,17 +901,17 @@ real_toupper (const gchar *str,
|
|
if (locale_type == LOCALE_TURKIC && c == 'i')
|
|
{
|
|
/* i => LATIN CAPITAL LETTER I WITH DOT ABOVE */
|
|
- len += g_unichar_to_utf8 (0x130, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (0x130, out_buffer, &len);
|
|
}
|
|
else if (c == 0x0345) /* COMBINING GREEK YPOGEGRAMMENI */
|
|
{
|
|
/* Nasty, need to move it after other combining marks .. this would go away if
|
|
* we normalized first.
|
|
*/
|
|
- len += output_marks (&p, out_buffer ? out_buffer + len : NULL, FALSE);
|
|
+ append_mark (&p, out_buffer, &len, TRUE);
|
|
|
|
/* And output as GREEK CAPITAL LETTER IOTA */
|
|
- len += g_unichar_to_utf8 (0x399, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (0x399, out_buffer, &len);
|
|
}
|
|
else if (IS (t,
|
|
OR (G_UNICODE_LOWERCASE_LETTER,
|
|
@@ -896,8 +922,8 @@ real_toupper (const gchar *str,
|
|
|
|
if (val >= 0x1000000)
|
|
{
|
|
- len += output_special_case (out_buffer ? out_buffer + len : NULL, val - 0x1000000, t,
|
|
- t == G_UNICODE_LOWERCASE_LETTER ? 0 : 1);
|
|
+ append_special_case (out_buffer, &len, val - 0x1000000, t,
|
|
+ t == G_UNICODE_LOWERCASE_LETTER ? 0 : 1);
|
|
}
|
|
else
|
|
{
|
|
@@ -917,7 +943,7 @@ real_toupper (const gchar *str,
|
|
/* Some lowercase letters, e.g., U+000AA, FEMININE ORDINAL INDICATOR,
|
|
* do not have an uppercase equivalent, in which case val will be
|
|
* zero. */
|
|
- len += g_unichar_to_utf8 (val ? val : c, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (val ? val : c, out_buffer, &len);
|
|
}
|
|
}
|
|
else
|
|
@@ -927,7 +953,7 @@ real_toupper (const gchar *str,
|
|
if (out_buffer)
|
|
memcpy (out_buffer + len, last, char_len);
|
|
|
|
- len += char_len;
|
|
+ increase_size (&len, char_len);
|
|
}
|
|
|
|
}
|
|
@@ -965,6 +991,8 @@ g_utf8_strup (const gchar *str,
|
|
* We use a two pass approach to keep memory management simple
|
|
*/
|
|
result_len = real_toupper (str, len, NULL, locale_type);
|
|
+ g_assert (result_len < G_MAXSIZE);
|
|
+
|
|
result = g_malloc (result_len + 1);
|
|
real_toupper (str, len, result, locale_type);
|
|
result[result_len] = '\0';
|
|
@@ -1022,14 +1050,15 @@ real_tolower (const gchar *str,
|
|
{
|
|
/* I + COMBINING DOT ABOVE => i (U+0069)
|
|
* LATIN CAPITAL LETTER I WITH DOT ABOVE => i (U+0069) */
|
|
- len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (0x0069, out_buffer, &len);
|
|
+
|
|
if (combining_dot)
|
|
p = g_utf8_next_char (p);
|
|
}
|
|
else
|
|
{
|
|
/* I => LATIN SMALL LETTER DOTLESS I */
|
|
- len += g_unichar_to_utf8 (0x131, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (0x131, out_buffer, &len);
|
|
}
|
|
}
|
|
/* Introduce an explicit dot above when lowercasing capital I's and J's
|
|
@@ -1037,19 +1066,19 @@ real_tolower (const gchar *str,
|
|
else if (locale_type == LOCALE_LITHUANIAN &&
|
|
(c == 0x00cc || c == 0x00cd || c == 0x0128))
|
|
{
|
|
- len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
|
|
- len += g_unichar_to_utf8 (0x0307, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (0x0069, out_buffer, &len);
|
|
+ append_utf8_char_to_buffer (0x0307, out_buffer, &len);
|
|
|
|
switch (c)
|
|
{
|
|
case 0x00cc:
|
|
- len += g_unichar_to_utf8 (0x0300, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (0x0300, out_buffer, &len);
|
|
break;
|
|
case 0x00cd:
|
|
- len += g_unichar_to_utf8 (0x0301, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (0x0301, out_buffer, &len);
|
|
break;
|
|
case 0x0128:
|
|
- len += g_unichar_to_utf8 (0x0303, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (0x0303, out_buffer, &len);
|
|
break;
|
|
}
|
|
}
|
|
@@ -1058,8 +1087,8 @@ real_tolower (const gchar *str,
|
|
c == 'J' || c == G_UNICHAR_FULLWIDTH_J || c == 0x012e) &&
|
|
has_more_above (p))
|
|
{
|
|
- len += g_unichar_to_utf8 (g_unichar_tolower (c), out_buffer ? out_buffer + len : NULL);
|
|
- len += g_unichar_to_utf8 (0x0307, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (g_unichar_tolower (c), out_buffer, &len);
|
|
+ append_utf8_char_to_buffer (0x0307, out_buffer, &len);
|
|
}
|
|
else if (c == 0x03A3) /* GREEK CAPITAL LETTER SIGMA */
|
|
{
|
|
@@ -1082,7 +1111,7 @@ real_tolower (const gchar *str,
|
|
else
|
|
val = 0x3c2; /* GREEK SMALL FINAL SIGMA */
|
|
|
|
- len += g_unichar_to_utf8 (val, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (val, out_buffer, &len);
|
|
}
|
|
else if (IS (t,
|
|
OR (G_UNICODE_UPPERCASE_LETTER,
|
|
@@ -1093,7 +1122,7 @@ real_tolower (const gchar *str,
|
|
|
|
if (val >= 0x1000000)
|
|
{
|
|
- len += output_special_case (out_buffer ? out_buffer + len : NULL, val - 0x1000000, t, 0);
|
|
+ append_special_case (out_buffer, &len, val - 0x1000000, t, 0);
|
|
}
|
|
else
|
|
{
|
|
@@ -1112,7 +1141,7 @@ real_tolower (const gchar *str,
|
|
|
|
/* Not all uppercase letters are guaranteed to have a lowercase
|
|
* equivalent. If this is the case, val will be zero. */
|
|
- len += g_unichar_to_utf8 (val ? val : c, out_buffer ? out_buffer + len : NULL);
|
|
+ append_utf8_char_to_buffer (val ? val : c, out_buffer, &len);
|
|
}
|
|
}
|
|
else
|
|
@@ -1122,7 +1151,7 @@ real_tolower (const gchar *str,
|
|
if (out_buffer)
|
|
memcpy (out_buffer + len, last, char_len);
|
|
|
|
- len += char_len;
|
|
+ increase_size (&len, char_len);
|
|
}
|
|
|
|
}
|
|
@@ -1159,6 +1188,8 @@ g_utf8_strdown (const gchar *str,
|
|
* We use a two pass approach to keep memory management simple
|
|
*/
|
|
result_len = real_tolower (str, len, NULL, locale_type);
|
|
+ g_assert (result_len < G_MAXSIZE);
|
|
+
|
|
result = g_malloc (result_len + 1);
|
|
real_tolower (str, len, result, locale_type);
|
|
result[result_len] = '\0';
|
|
--
|
|
GitLab
|
|
|
|
|
|
From b96966058f4291db8970ced70ee22103e63679e5 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
|
|
Date: Fri, 23 Jan 2026 17:39:34 +0100
|
|
Subject: [PATCH 4/4] glib/tests/unicode: Add test debug information when
|
|
parsing input files
|
|
|
|
On case of failures makes it easier to understand on what line of the
|
|
source file we're at, as it might not be clear for non-ascii chars
|
|
---
|
|
glib/tests/unicode.c | 10 ++++++++++
|
|
1 file changed, 10 insertions(+)
|
|
|
|
diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
|
|
index 90b5a98b8f..44d1083dd5 100644
|
|
--- a/glib/tests/unicode.c
|
|
+++ b/glib/tests/unicode.c
|
|
@@ -622,6 +622,7 @@ test_casemap_and_casefold (void)
|
|
const char *locale;
|
|
const char *test;
|
|
const char *expected;
|
|
+ size_t line = 0;
|
|
char *convert;
|
|
char *current_locale = setlocale (LC_CTYPE, NULL);
|
|
char *old_lc_all, *old_lc_messages, *old_lang;
|
|
@@ -642,6 +643,7 @@ test_casemap_and_casefold (void)
|
|
|
|
while (fgets (buffer, sizeof (buffer), infile))
|
|
{
|
|
+ line++;
|
|
if (buffer[0] == '#')
|
|
continue;
|
|
|
|
@@ -684,6 +686,9 @@ test_casemap_and_casefold (void)
|
|
|
|
convert = g_utf8_strup (test, -1);
|
|
expected = strings[4][0] ? strings[4] : test;
|
|
+ g_test_message ("Converting '%s' => '%s' (line %" G_GSIZE_FORMAT ")",
|
|
+ test, expected, line);
|
|
+
|
|
g_assert_cmpstr (convert, ==, expected);
|
|
g_free (convert);
|
|
|
|
@@ -703,9 +708,11 @@ test_casemap_and_casefold (void)
|
|
|
|
infile = g_fopen (filename, "re");
|
|
g_assert (infile != NULL);
|
|
+ line = 0;
|
|
|
|
while (fgets (buffer, sizeof (buffer), infile))
|
|
{
|
|
+ line++;
|
|
if (buffer[0] == '#')
|
|
continue;
|
|
|
|
@@ -715,6 +722,9 @@ test_casemap_and_casefold (void)
|
|
test = strings[0];
|
|
|
|
convert = g_utf8_casefold (test, -1);
|
|
+ g_test_message ("Converting '%s' => '%s' (line %" G_GSIZE_FORMAT ")",
|
|
+ test, strings[1], line);
|
|
+
|
|
g_assert_cmpstr (convert, ==, strings[1]);
|
|
g_free (convert);
|
|
|
|
--
|
|
GitLab
|
|
|