From c9e48947e18f2ced4b544750be624c47a741e691 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Thu, 3 Oct 2024 18:18:19 +0100 Subject: [PATCH] gstring: Fix a heap buffer overflow in the new g_string_replace() code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This affects the new `g_string_replace()` code which landed on `main` a few days ago. It does not affect the old implementation of `g_string_replace()`. The code for the `f_len == 0` (needle is an empty string) case was modifying `string` in the loop, without updating any of the string pointers into it. If the replacement was long enough (or inserted enough times), this would trigger a realloc of `string->str` and cause all the string pointers to be dangling. Fix this by pulling the `f_len == 0` code out into a separate branch and loop, rather than trying to integrate it into the main loop. This simplifies the main loop significantly, and makes both easier to verify. An alternative approach, which doesn’t involve splitting the `f_len == 0` case out, might have been to track the positions using indexes rather than string pointers. I think the approach in this commit is better, though, as it removes the possibility of `f_len == 0` entirely from the loop, which makes it much easier to verify termination of the loop. Add more tests to validate this, including the test from oss-fuzz which triggered the realloc and found the heap buffer overflow. The new tests have also been run against the _old_ implementation of `g_string_replace()` to ensure its behaviour (particularly around `f_len == 0 && limit > 0`) has not changed. Signed-off-by: Philip Withnall oss-fuzz#371043019 --- glib/gstring.c | 85 +++++++++++++++++++++++++++++---------------- glib/tests/string.c | 15 ++++++++ 2 files changed, 71 insertions(+), 29 deletions(-) diff --git a/glib/gstring.c b/glib/gstring.c index 4b4ff5691..5279ed3cc 100644 --- a/glib/gstring.c +++ b/glib/gstring.c @@ -1041,10 +1041,53 @@ g_string_replace (GString *string, f_len = strlen (find); r_len = strlen (replace); + /* It removes a lot of branches and possibility for infinite loops if we + * handle the case of an empty @find string separately. */ + if (G_UNLIKELY (f_len == 0)) + { + if (limit == 0 || limit > string->len) + { + if (string->len > G_MAXSIZE - 1) + g_error ("inserting in every position in string would overflow"); + + limit = string->len + 1; + } + + if (r_len > 0 && + (limit > G_MAXSIZE / r_len || + limit * r_len > G_MAXSIZE - string->len)) + g_error ("inserting in every position in string would overflow"); + + new_len = string->len + limit * r_len; + new_string = g_string_sized_new (new_len); + for (size_t i = 0; i < limit; i++) + { + g_string_append_len (new_string, replace, r_len); + if (i < string->len) + g_string_append_c (new_string, string->str[i]); + } + if (limit < string->len) + g_string_append_len (new_string, string->str + limit, string->len - limit); + + g_free (string->str); + string->allocated_len = new_string->allocated_len; + string->len = new_string->len; + string->str = g_string_free_and_steal (g_steal_pointer (&new_string)); + + return limit; + } + /* Potentially do two passes: the first to calculate the length of the new string, * new_len, if it’s going to be longer than the original string; and the second to * do the replacements. The first pass is skipped if the new string is going to be - * no longer than the original. */ + * no longer than the original. + * + * The second pass calls various g_string_insert_len() (and similar) methods + * which would normally potentially reallocate string->str, and hence + * invalidate the cur/next/first/dst pointers. Because we’ve pre-calculated + * the new_len and do all the string manipulations on new_string, that + * shouldn’t happen. This means we scan `string` while modifying + * `new_string`. */ do { dst = first; @@ -1053,48 +1096,32 @@ g_string_replace (GString *string, while ((next = strstr (cur, find)) != NULL) { n++; - if G_UNLIKELY (f_len == 0) + + if (r_len <= f_len) { - g_string_insert_len (string, next - string->str, replace, r_len); - cur = next + r_len; - /* Only match the empty string once at any given position, to - * avoid infinite loops */ - if (cur[0] == '\0') - break; - else - cur++; + memmove (dst, cur, next - cur); + dst += next - cur; + memcpy (dst, replace, r_len); + dst += r_len; } else { - if (r_len <= f_len) + if (new_string == NULL) { - memmove (dst, cur, next - cur); - dst += next - cur; - memcpy (dst, replace, r_len); - dst += r_len; + new_len += r_len - f_len; } else { - if (new_string == NULL) - { - new_len += r_len - f_len; - } - else - { - g_string_append_len (new_string, cur, next - cur); - g_string_append_len (new_string, replace, r_len); - } + g_string_append_len (new_string, cur, next - cur); + g_string_append_len (new_string, replace, r_len); } - cur = next + f_len; } + cur = next + f_len; + if (n == limit) break; } - /* @find is an empty string. */ - if (f_len == 0) - break; - /* Append the trailing characters from after the final instance of @find * in the input string. */ if (r_len <= f_len) diff --git a/glib/tests/string.c b/glib/tests/string.c index 852c71b29..6023bc43d 100644 --- a/glib/tests/string.c +++ b/glib/tests/string.c @@ -664,10 +664,25 @@ test_string_replace (void) "bba", 1 }, { "foo", "", "bar", 0, "barfbarobarobar", 4 }, + { "foo", "", "bar", 1, + "barfoo", 1 }, + { "foo", "", "bar", 2, + "barfbaroo", 2 }, + { "foo", "", "bar", 3, + "barfbarobaro", 3 }, + { "foo", "", "bar", 4, + "barfbarobarobar", 4 }, + { "foo", "", "bar", 5, + "barfbarobarobar", 4 }, { "", "", "x", 0, "x", 1 }, { "", "", "", 0, "", 1 }, + /* use find and replace strings long enough to trigger a reallocation in + * the result string */ + { "bbbbbbbbb", "", "aaaaaaaaaaaa", 0, + "aaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaab" + "aaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaa", 10 }, }; gsize i;