gstring: Fix a heap buffer overflow in the new g_string_replace() code

This affects the new `g_string_replace()` code which landed on `main` a
few days ago. It does not affect the old implementation of
`g_string_replace()`.

The code for the `f_len == 0` (needle is an empty string) case was
modifying `string` in the loop, without updating any of the string
pointers into it. If the replacement was long enough (or inserted enough
times), this would trigger a realloc of `string->str` and cause all the
string pointers to be dangling.

Fix this by pulling the `f_len == 0` code out into a separate branch and
loop, rather than trying to integrate it into the main loop. This
simplifies the main loop significantly, and makes both easier to verify.

An alternative approach, which doesn’t involve splitting the
`f_len == 0` case out, might have been to track the positions using
indexes rather than string pointers. I think the approach in this commit
is better, though, as it removes the possibility of `f_len == 0`
entirely from the loop, which makes it much easier to verify termination
of the loop.

Add more tests to validate this, including the test from oss-fuzz which
triggered the realloc and found the heap buffer overflow.

The new tests have also been run against the _old_ implementation of
`g_string_replace()` to ensure its behaviour (particularly around `f_len
== 0 && limit > 0`) has not changed.

Signed-off-by: Philip Withnall <pwithnall@gnome.org>

oss-fuzz#371043019
This commit is contained in:
Philip Withnall 2024-10-03 18:18:19 +01:00
parent 7f8f0842d0
commit c9e48947e1
No known key found for this signature in database
GPG Key ID: DCDF5885B1F3ED73
2 changed files with 71 additions and 29 deletions

View File

@ -1041,10 +1041,53 @@ g_string_replace (GString *string,
f_len = strlen (find); f_len = strlen (find);
r_len = strlen (replace); r_len = strlen (replace);
/* It removes a lot of branches and possibility for infinite loops if we
* handle the case of an empty @find string separately. */
if (G_UNLIKELY (f_len == 0))
{
if (limit == 0 || limit > string->len)
{
if (string->len > G_MAXSIZE - 1)
g_error ("inserting in every position in string would overflow");
limit = string->len + 1;
}
if (r_len > 0 &&
(limit > G_MAXSIZE / r_len ||
limit * r_len > G_MAXSIZE - string->len))
g_error ("inserting in every position in string would overflow");
new_len = string->len + limit * r_len;
new_string = g_string_sized_new (new_len);
for (size_t i = 0; i < limit; i++)
{
g_string_append_len (new_string, replace, r_len);
if (i < string->len)
g_string_append_c (new_string, string->str[i]);
}
if (limit < string->len)
g_string_append_len (new_string, string->str + limit, string->len - limit);
g_free (string->str);
string->allocated_len = new_string->allocated_len;
string->len = new_string->len;
string->str = g_string_free_and_steal (g_steal_pointer (&new_string));
return limit;
}
/* Potentially do two passes: the first to calculate the length of the new string, /* Potentially do two passes: the first to calculate the length of the new string,
* new_len, if its going to be longer than the original string; and the second to * new_len, if its going to be longer than the original string; and the second to
* do the replacements. The first pass is skipped if the new string is going to be * do the replacements. The first pass is skipped if the new string is going to be
* no longer than the original. */ * no longer than the original.
*
* The second pass calls various g_string_insert_len() (and similar) methods
* which would normally potentially reallocate string->str, and hence
* invalidate the cur/next/first/dst pointers. Because weve pre-calculated
* the new_len and do all the string manipulations on new_string, that
* shouldnt happen. This means we scan `string` while modifying
* `new_string`. */
do do
{ {
dst = first; dst = first;
@ -1053,19 +1096,7 @@ g_string_replace (GString *string,
while ((next = strstr (cur, find)) != NULL) while ((next = strstr (cur, find)) != NULL)
{ {
n++; n++;
if G_UNLIKELY (f_len == 0)
{
g_string_insert_len (string, next - string->str, replace, r_len);
cur = next + r_len;
/* Only match the empty string once at any given position, to
* avoid infinite loops */
if (cur[0] == '\0')
break;
else
cur++;
}
else
{
if (r_len <= f_len) if (r_len <= f_len)
{ {
memmove (dst, cur, next - cur); memmove (dst, cur, next - cur);
@ -1086,15 +1117,11 @@ g_string_replace (GString *string,
} }
} }
cur = next + f_len; cur = next + f_len;
}
if (n == limit) if (n == limit)
break; break;
} }
/* @find is an empty string. */
if (f_len == 0)
break;
/* Append the trailing characters from after the final instance of @find /* Append the trailing characters from after the final instance of @find
* in the input string. */ * in the input string. */
if (r_len <= f_len) if (r_len <= f_len)

View File

@ -664,10 +664,25 @@ test_string_replace (void)
"bba", 1 }, "bba", 1 },
{ "foo", "", "bar", 0, { "foo", "", "bar", 0,
"barfbarobarobar", 4 }, "barfbarobarobar", 4 },
{ "foo", "", "bar", 1,
"barfoo", 1 },
{ "foo", "", "bar", 2,
"barfbaroo", 2 },
{ "foo", "", "bar", 3,
"barfbarobaro", 3 },
{ "foo", "", "bar", 4,
"barfbarobarobar", 4 },
{ "foo", "", "bar", 5,
"barfbarobarobar", 4 },
{ "", "", "x", 0, { "", "", "x", 0,
"x", 1 }, "x", 1 },
{ "", "", "", 0, { "", "", "", 0,
"", 1 }, "", 1 },
/* use find and replace strings long enough to trigger a reallocation in
* the result string */
{ "bbbbbbbbb", "", "aaaaaaaaaaaa", 0,
"aaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaab"
"aaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaabaaaaaaaaaaaa", 10 },
}; };
gsize i; gsize i;