gcharset: Fix potential negative string offsets in g_get_locale_variants()

The parser was assuming that all three separators (`_@.`) were in order; but the input might not contain them in order. In that case, the parser would have passed negative values to `g_strndup()` which would have been implicitly cast to large positive values, and potentially exposed a lot of memory (until the first nul byte, which was probably quite soon). Expand the existing `g_get_locale_variants()` test to cover some invalid parsing, and add a fuzzing test too. Spotted by `-Wsign-conversion`. Signed-off-by: Philip Withnall <pwithnall@gnome.org> Helps: #3405
2025-08-04 08:23:38 +02:00 · 2025-04-10 19:50:29 +01:00
parent c6884c95f6
commit 7aeb4d94f2
4 changed files with 82 additions and 9 deletions
--- a/fuzzing/fuzz_get_locale_variants.c
+++ b/fuzzing/fuzz_get_locale_variants.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2025 GNOME Foundation, Inc.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authors:
+ *  - Philip Withnall <pwithnall@gnome.org>
+ */
+
+#include "fuzz.h"
+
+int
+LLVMFuzzerTestOneInput (const unsigned char *data, size_t size)
+{
+  unsigned char *nul_terminated_data = NULL;
+  char **v;
+
+  fuzz_set_logging_func ();
+
+  /* ignore @size (g_get_locale_variants() doesn’t support it); ensure @data is nul-terminated */
+  nul_terminated_data = (unsigned char *) g_strndup ((const char *) data, size);
+
+  v = g_get_locale_variants ((char *) nul_terminated_data);
+  g_assert_nonnull (v);
+  /* g_get_locale_variants() guarantees that the input is always in the output: */
+  g_assert_true (g_strv_contains ((const char * const *) v, (char *) nul_terminated_data));
+  g_strfreev (v);
+
+  g_free (nul_terminated_data);
+
+  return 0;
+}
--- a/fuzzing/meson.build
+++ b/fuzzing/meson.build
@@ -25,6 +25,7 @@ fuzz_targets = [
  'fuzz_date_parse',
  'fuzz_date_time_new_from_iso8601',
  'fuzz_dbus_message',
+  'fuzz_get_locale_variants',
  'fuzz_inet_address_mask_new_from_string',
  'fuzz_inet_address_new_from_string',
  'fuzz_inet_socket_address_new_from_string',
--- a/glib/gcharset.c
+++ b/glib/gcharset.c
@@ -537,6 +537,7 @@ enum
 };

 /* Break an X/Open style locale specification into components
+ * e.g. `en_GB` or `uz_UZ.utf8@cyrillic`
 */
 static guint
 explode_locale (const gchar *locale,
@@ -563,7 +564,7 @@ explode_locale (const gchar *locale,
  else
    at_pos = locale + strlen (locale);

-  if (dot_pos)
+  if (dot_pos && dot_pos < at_pos)
    {
      mask |= COMPONENT_CODESET;
      *codeset = g_strndup (dot_pos, at_pos - dot_pos);
@@ -571,7 +572,7 @@ explode_locale (const gchar *locale,
  else
    dot_pos = at_pos;

-  if (uscore_pos)
+  if (uscore_pos && uscore_pos < dot_pos)
    {
      mask |= COMPONENT_TERRITORY;
      *territory = g_strndup (uscore_pos, dot_pos - uscore_pos);
@@ -579,6 +580,7 @@ explode_locale (const gchar *locale,
  else
    uscore_pos = dot_pos;

+  g_assert (uscore_pos >= locale);
  *language = g_strndup (locale, uscore_pos - locale);

  return mask;
--- a/glib/tests/utils.c
+++ b/glib/tests/utils.c
@@ -71,15 +71,40 @@ test_language_names (void)
 static void
 test_locale_variants (void)
 {
-  char **v;
+  const struct
+    {
+      const char *locale_str;
+      const char * const *expected_variants;
+    }
+  vectors[] =
+    {
+      /* Try some valid locales */
+      { "en", (const char *[]) { "en", NULL } },
+      { "sr@latin", (const char *[]) { "sr@latin", "sr", NULL } },
+      { "fr_BE", (const char *[]) { "fr_BE", "fr", NULL } },
+      { "sr_SR@latin", (const char *[]) { "sr_SR@latin", "sr@latin", "sr_SR", "sr", NULL } },
+      { "sr_SR@latin.UTF-8", (const char *[]) { "sr_SR@latin.UTF-8", "sr_SR@latin", "sr.UTF-8", "sr", NULL } },

-  v = g_get_locale_variants ("fr_BE");
-  g_assert_cmpstrv (v, ((const char *[]) { "fr_BE", "fr", NULL }));
-  g_strfreev (v);
+      /* And some invalid ones. The parser should try and extract what value it can */
+      { "sr@latin_invalid", (const char *[]) { "sr@latin_invalid", "sr@latin", NULL } },
+      { "sr.UTF-8@latin", (const char *[]) { "sr.UTF-8@latin", "sr@latin", "sr.UTF-8", "sr", NULL } },
+      { "sr.UTF-8_latin", (const char *[]) { "sr.UTF-8_latin", "sr.UTF-8", NULL } },
+      { "sr.UTF-8@latin_invalid", (const char *[]) { "sr.UTF-8@latin_invalid", "sr.UTF-8@latin", NULL } },
+    };
+  size_t i;

-  v = g_get_locale_variants ("sr_SR@latin");
-  g_assert_cmpstrv (v, ((const char *[]) { "sr_SR@latin", "sr@latin", "sr_SR", "sr", NULL }));
-  g_strfreev (v);
+  for (i = 0; i < G_N_ELEMENTS (vectors); i++)
+    {
+      char **v;
+
+      g_test_message ("Testing locale ‘%s’", vectors[i].locale_str);
+
+      v = g_get_locale_variants (vectors[i].locale_str);
+      g_assert_cmpstrv (v, vectors[i].expected_variants);
+      /* g_get_locale_variants() guarantees that the input is always in the output: */
+      g_assert_true (g_strv_contains ((const char * const *) v, vectors[i].locale_str));
+      g_strfreev (v);
+    }
 }

 static void