gcharset: Fix potential negative string offsets in g_get_locale_variants()

The parser was assuming that all three separators (`_@.`) were in order;
but the input might not contain them in order. In that case, the parser
would have passed negative values to `g_strndup()` which would have been
implicitly cast to large positive values, and potentially exposed a lot
of memory (until the first nul byte, which was probably quite soon).

Expand the existing `g_get_locale_variants()` test to cover some invalid
parsing, and add a fuzzing test too.

Spotted by `-Wsign-conversion`.

Signed-off-by: Philip Withnall <pwithnall@gnome.org>

Helps: #3405
This commit is contained in:
Philip Withnall
2025-04-10 19:50:29 +01:00
parent c6884c95f6
commit 7aeb4d94f2
4 changed files with 82 additions and 9 deletions

View File

@@ -0,0 +1,45 @@
/*
* Copyright 2025 GNOME Foundation, Inc.
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
* Authors:
* - Philip Withnall <pwithnall@gnome.org>
*/
#include "fuzz.h"
int
LLVMFuzzerTestOneInput (const unsigned char *data, size_t size)
{
unsigned char *nul_terminated_data = NULL;
char **v;
fuzz_set_logging_func ();
/* ignore @size (g_get_locale_variants() doesnt support it); ensure @data is nul-terminated */
nul_terminated_data = (unsigned char *) g_strndup ((const char *) data, size);
v = g_get_locale_variants ((char *) nul_terminated_data);
g_assert_nonnull (v);
/* g_get_locale_variants() guarantees that the input is always in the output: */
g_assert_true (g_strv_contains ((const char * const *) v, (char *) nul_terminated_data));
g_strfreev (v);
g_free (nul_terminated_data);
return 0;
}

View File

@@ -25,6 +25,7 @@ fuzz_targets = [
'fuzz_date_parse',
'fuzz_date_time_new_from_iso8601',
'fuzz_dbus_message',
'fuzz_get_locale_variants',
'fuzz_inet_address_mask_new_from_string',
'fuzz_inet_address_new_from_string',
'fuzz_inet_socket_address_new_from_string',

View File

@@ -537,6 +537,7 @@ enum
};
/* Break an X/Open style locale specification into components
* e.g. `en_GB` or `uz_UZ.utf8@cyrillic`
*/
static guint
explode_locale (const gchar *locale,
@@ -563,7 +564,7 @@ explode_locale (const gchar *locale,
else
at_pos = locale + strlen (locale);
if (dot_pos)
if (dot_pos && dot_pos < at_pos)
{
mask |= COMPONENT_CODESET;
*codeset = g_strndup (dot_pos, at_pos - dot_pos);
@@ -571,7 +572,7 @@ explode_locale (const gchar *locale,
else
dot_pos = at_pos;
if (uscore_pos)
if (uscore_pos && uscore_pos < dot_pos)
{
mask |= COMPONENT_TERRITORY;
*territory = g_strndup (uscore_pos, dot_pos - uscore_pos);
@@ -579,6 +580,7 @@ explode_locale (const gchar *locale,
else
uscore_pos = dot_pos;
g_assert (uscore_pos >= locale);
*language = g_strndup (locale, uscore_pos - locale);
return mask;

View File

@@ -71,15 +71,40 @@ test_language_names (void)
static void
test_locale_variants (void)
{
char **v;
const struct
{
const char *locale_str;
const char * const *expected_variants;
}
vectors[] =
{
/* Try some valid locales */
{ "en", (const char *[]) { "en", NULL } },
{ "sr@latin", (const char *[]) { "sr@latin", "sr", NULL } },
{ "fr_BE", (const char *[]) { "fr_BE", "fr", NULL } },
{ "sr_SR@latin", (const char *[]) { "sr_SR@latin", "sr@latin", "sr_SR", "sr", NULL } },
{ "sr_SR@latin.UTF-8", (const char *[]) { "sr_SR@latin.UTF-8", "sr_SR@latin", "sr.UTF-8", "sr", NULL } },
v = g_get_locale_variants ("fr_BE");
g_assert_cmpstrv (v, ((const char *[]) { "fr_BE", "fr", NULL }));
g_strfreev (v);
/* And some invalid ones. The parser should try and extract what value it can */
{ "sr@latin_invalid", (const char *[]) { "sr@latin_invalid", "sr@latin", NULL } },
{ "sr.UTF-8@latin", (const char *[]) { "sr.UTF-8@latin", "sr@latin", "sr.UTF-8", "sr", NULL } },
{ "sr.UTF-8_latin", (const char *[]) { "sr.UTF-8_latin", "sr.UTF-8", NULL } },
{ "sr.UTF-8@latin_invalid", (const char *[]) { "sr.UTF-8@latin_invalid", "sr.UTF-8@latin", NULL } },
};
size_t i;
v = g_get_locale_variants ("sr_SR@latin");
g_assert_cmpstrv (v, ((const char *[]) { "sr_SR@latin", "sr@latin", "sr_SR", "sr", NULL }));
g_strfreev (v);
for (i = 0; i < G_N_ELEMENTS (vectors); i++)
{
char **v;
g_test_message ("Testing locale %s", vectors[i].locale_str);
v = g_get_locale_variants (vectors[i].locale_str);
g_assert_cmpstrv (v, vectors[i].expected_variants);
/* g_get_locale_variants() guarantees that the input is always in the output: */
g_assert_true (g_strv_contains ((const char * const *) v, vectors[i].locale_str));
g_strfreev (v);
}
}
static void