mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2024-11-13 04:46:15 +01:00
gconvert: Consistently validate inputs and outputs for embedded NULs
String inputs to convenience conversion functions g_locale_from_utf8(), g_filename_from_utf8(), and g_filename_to_utf8(), are annotated for the bindings as NUL-terminated strings of (type utf8) or (type filename). There is also a len parameter that allows converting part of the string, but it is exposed to the bindings as a value independent from the string buffer. Absent any more sophisticated ways to annotate, the way to provide a safeguard against len argument values longer than the string length is to check that no nul is encountered within the first len bytes of the string. strdup_len() includes this check as part of UTF-8 validation, but g_convert() permits embedded nuls. For g_filename_from_utf8(), also check the output to prevent embedded NUL bytes. It's not safe to allow embedded NULs in a string that is going to be used as (type filename), and no known bytestring encoding for file names allows them. https://bugzilla.gnome.org/show_bug.cgi?id=792516
This commit is contained in:
parent
d584ff77f6
commit
f35a6a7031
101
glib/gconvert.c
101
glib/gconvert.c
@ -866,28 +866,67 @@ strdup_len (const gchar *string,
|
||||
return g_strndup (string, real_len);
|
||||
}
|
||||
|
||||
static gchar *
|
||||
convert_to_utf8 (const gchar *opsysstring,
|
||||
gssize len,
|
||||
const gchar *charset,
|
||||
gsize *bytes_read,
|
||||
gsize *bytes_written,
|
||||
GError **error)
|
||||
typedef enum
|
||||
{
|
||||
gchar *utf8;
|
||||
CONVERT_CHECK_NO_NULS_IN_INPUT = 1 << 0,
|
||||
CONVERT_CHECK_NO_NULS_IN_OUTPUT = 1 << 1
|
||||
} ConvertCheckFlags;
|
||||
|
||||
/*
|
||||
* Convert from @string in the encoding identified by @from_codeset,
|
||||
* returning a string in the encoding identifed by @to_codeset.
|
||||
* @len can be negative if @string is nul-terminated, or a non-negative
|
||||
* value in bytes. Flags defined in #ConvertCheckFlags can be set in @flags
|
||||
* to check the input, the output, or both, for embedded nul bytes.
|
||||
* On success, @bytes_read, if provided, will be set to the number of bytes
|
||||
* in @string up to @len or the terminating nul byte, and @bytes_written, if
|
||||
* provided, will be set to the number of output bytes written into the
|
||||
* returned buffer, excluding the terminating nul sequence.
|
||||
* On error, @bytes_read will be set to the byte offset after the last valid
|
||||
* sequence in @string, and @bytes_written will be set to 0.
|
||||
*/
|
||||
static gchar *
|
||||
convert_checked (const gchar *string,
|
||||
gssize len,
|
||||
const gchar *to_codeset,
|
||||
const gchar *from_codeset,
|
||||
ConvertCheckFlags flags,
|
||||
gsize *bytes_read,
|
||||
gsize *bytes_written,
|
||||
GError **error)
|
||||
{
|
||||
gchar *out;
|
||||
gsize outbytes;
|
||||
|
||||
utf8 = g_convert (opsysstring, len, "UTF-8", charset,
|
||||
bytes_read, &outbytes, error);
|
||||
if (utf8 == NULL)
|
||||
if ((flags & CONVERT_CHECK_NO_NULS_IN_INPUT) && len > 0)
|
||||
{
|
||||
const gchar *early_nul = memchr (string, '\0', len);
|
||||
if (early_nul != NULL)
|
||||
{
|
||||
if (bytes_read)
|
||||
*bytes_read = early_nul - string;
|
||||
if (bytes_written)
|
||||
*bytes_written = 0;
|
||||
|
||||
g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
|
||||
_("Embedded NUL byte in conversion input"));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
out = g_convert (string, len, to_codeset, from_codeset,
|
||||
bytes_read, &outbytes, error);
|
||||
if (out == NULL)
|
||||
{
|
||||
if (bytes_written)
|
||||
*bytes_written = 0;
|
||||
return NULL;
|
||||
}
|
||||
if (memchr (utf8, '\0', outbytes) != NULL)
|
||||
|
||||
if ((flags & CONVERT_CHECK_NO_NULS_IN_OUTPUT)
|
||||
&& memchr (out, '\0', outbytes) != NULL)
|
||||
{
|
||||
g_free (utf8);
|
||||
g_free (out);
|
||||
if (bytes_written)
|
||||
*bytes_written = 0;
|
||||
g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL,
|
||||
@ -897,7 +936,7 @@ convert_to_utf8 (const gchar *opsysstring,
|
||||
|
||||
if (bytes_written)
|
||||
*bytes_written = outbytes;
|
||||
return utf8;
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -948,7 +987,8 @@ g_locale_to_utf8 (const gchar *opsysstring,
|
||||
if (g_get_charset (&charset))
|
||||
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
|
||||
else
|
||||
return convert_to_utf8 (opsysstring, len, charset,
|
||||
return convert_checked (opsysstring, len, "UTF-8", charset,
|
||||
CONVERT_CHECK_NO_NULS_IN_OUTPUT,
|
||||
bytes_read, bytes_written, error);
|
||||
}
|
||||
|
||||
@ -975,8 +1015,8 @@ g_locale_to_utf8 (const gchar *opsysstring,
|
||||
* system) in the [current locale][setlocale]. On Windows this means
|
||||
* the system codepage.
|
||||
*
|
||||
* The input string should not contain nul characters even if the @len
|
||||
* argument is positive. A nul character found inside the string may result
|
||||
* The input string shall not contain nul characters even if the @len
|
||||
* argument is positive. A nul character found inside the string will result
|
||||
* in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert
|
||||
* input that may contain embedded nul characters.
|
||||
*
|
||||
@ -995,8 +1035,9 @@ g_locale_from_utf8 (const gchar *utf8string,
|
||||
if (g_get_charset (&charset))
|
||||
return strdup_len (utf8string, len, bytes_read, bytes_written, error);
|
||||
else
|
||||
return g_convert (utf8string, len,
|
||||
charset, "UTF-8", bytes_read, bytes_written, error);
|
||||
return convert_checked (utf8string, len, charset, "UTF-8",
|
||||
CONVERT_CHECK_NO_NULS_IN_INPUT,
|
||||
bytes_read, bytes_written, error);
|
||||
}
|
||||
|
||||
#ifndef G_PLATFORM_WIN32
|
||||
@ -1184,12 +1225,12 @@ get_filename_charset (const gchar **filename_charset)
|
||||
* for filenames; on other platforms, this function indirectly depends on
|
||||
* the [current locale][setlocale].
|
||||
*
|
||||
* The input string shall not contain nul characters even if the @len
|
||||
* argument is positive. A nul character found inside the string will result
|
||||
* in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
|
||||
* If the source encoding is not UTF-8 and the conversion output contains a
|
||||
* nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
|
||||
* function returns %NULL.
|
||||
* If the source encoding is UTF-8, an embedded nul character is treated with
|
||||
* the %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error for backward compatibility with
|
||||
* earlier versions of this library. Use g_convert() to produce output that
|
||||
* function returns %NULL. Use g_convert() to produce output that
|
||||
* may contain embedded nul characters.
|
||||
*
|
||||
* Returns: The converted string, or %NULL on an error.
|
||||
@ -1208,7 +1249,9 @@ g_filename_to_utf8 (const gchar *opsysstring,
|
||||
if (get_filename_charset (&charset))
|
||||
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
|
||||
else
|
||||
return convert_to_utf8 (opsysstring, len, charset,
|
||||
return convert_checked (opsysstring, len, "UTF-8", charset,
|
||||
CONVERT_CHECK_NO_NULS_IN_INPUT |
|
||||
CONVERT_CHECK_NO_NULS_IN_OUTPUT,
|
||||
bytes_read, bytes_written, error);
|
||||
}
|
||||
|
||||
@ -1235,8 +1278,8 @@ g_filename_to_utf8 (const gchar *opsysstring,
|
||||
* on other platforms, this function indirectly depends on the
|
||||
* [current locale][setlocale].
|
||||
*
|
||||
* The input string should not contain nul characters even if the @len
|
||||
* argument is positive. A nul character found inside the string may result
|
||||
* The input string shall not contain nul characters even if the @len
|
||||
* argument is positive. A nul character found inside the string will result
|
||||
* in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Note that nul bytes are
|
||||
* prohibited in all filename encodings that GLib is known to work with.
|
||||
*
|
||||
@ -1255,8 +1298,10 @@ g_filename_from_utf8 (const gchar *utf8string,
|
||||
if (get_filename_charset (&charset))
|
||||
return strdup_len (utf8string, len, bytes_read, bytes_written, error);
|
||||
else
|
||||
return g_convert (utf8string, len,
|
||||
charset, "UTF-8", bytes_read, bytes_written, error);
|
||||
return convert_checked (utf8string, len, charset, "UTF-8",
|
||||
CONVERT_CHECK_NO_NULS_IN_INPUT |
|
||||
CONVERT_CHECK_NO_NULS_IN_OUTPUT,
|
||||
bytes_read, bytes_written, error);
|
||||
}
|
||||
|
||||
/* Test of haystack has the needle prefix, comparing case
|
||||
|
@ -685,11 +685,11 @@ test_filename_display (void)
|
||||
}
|
||||
|
||||
static void
|
||||
test_locale_embedded_nul (void)
|
||||
test_locale_to_utf8_embedded_nul (void)
|
||||
{
|
||||
g_test_trap_subprocess ("/conversion/locale-embedded-nul/subprocess/utf8", 0, 0);
|
||||
g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
|
||||
g_test_trap_assert_passed ();
|
||||
g_test_trap_subprocess ("/conversion/locale-embedded-nul/subprocess/iconv", 0, 0);
|
||||
g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
|
||||
g_test_trap_assert_passed ();
|
||||
}
|
||||
|
||||
@ -697,7 +697,7 @@ test_locale_embedded_nul (void)
|
||||
* result in an error.
|
||||
*/
|
||||
static void
|
||||
test_locale_embedded_nul_utf8 (void)
|
||||
test_locale_to_utf8_embedded_nul_utf8 (void)
|
||||
{
|
||||
gchar *res;
|
||||
gsize bytes_read;
|
||||
@ -719,7 +719,7 @@ test_locale_embedded_nul_utf8 (void)
|
||||
* when converted from non-UTF8 input, result in an error.
|
||||
*/
|
||||
static void
|
||||
test_locale_embedded_nul_iconv (void)
|
||||
test_locale_to_utf8_embedded_nul_iconv (void)
|
||||
{
|
||||
gchar *res;
|
||||
GError *error = NULL;
|
||||
@ -736,11 +736,64 @@ test_locale_embedded_nul_iconv (void)
|
||||
}
|
||||
|
||||
static void
|
||||
test_filename_embedded_nul (void)
|
||||
test_locale_from_utf8_embedded_nul (void)
|
||||
{
|
||||
g_test_trap_subprocess ("/conversion/filename-embedded-nul/subprocess/utf8", 0, 0);
|
||||
g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
|
||||
g_test_trap_assert_passed ();
|
||||
g_test_trap_subprocess ("/conversion/filename-embedded-nul/subprocess/iconv", 0, 0);
|
||||
g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
|
||||
g_test_trap_assert_passed ();
|
||||
}
|
||||
|
||||
/* Test that embedded nul characters in input to g_locale_from_utf8(),
|
||||
* when converting (copying) to UTF-8 output, result in an error.
|
||||
*/
|
||||
static void
|
||||
test_locale_from_utf8_embedded_nul_utf8 (void)
|
||||
{
|
||||
gchar *res;
|
||||
gsize bytes_read;
|
||||
GError *error = NULL;
|
||||
|
||||
setlocale (LC_ALL, "");
|
||||
g_setenv ("CHARSET", "UTF-8", TRUE);
|
||||
g_assert_true (g_get_charset (NULL));
|
||||
|
||||
res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
|
||||
|
||||
g_assert_null (res);
|
||||
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
|
||||
g_assert_cmpuint (bytes_read, ==, 2);
|
||||
g_error_free (error);
|
||||
}
|
||||
|
||||
/* Test that embedded nul characters in input to g_locale_from_utf8(),
|
||||
* when converting to non-UTF-8 output, result in an error.
|
||||
*/
|
||||
static void
|
||||
test_locale_from_utf8_embedded_nul_iconv (void)
|
||||
{
|
||||
gchar *res;
|
||||
gsize bytes_read;
|
||||
GError *error = NULL;
|
||||
|
||||
setlocale (LC_ALL, "C");
|
||||
g_setenv ("CHARSET", "US-ASCII", TRUE);
|
||||
g_assert_false (g_get_charset (NULL));
|
||||
|
||||
res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
|
||||
|
||||
g_assert_null (res);
|
||||
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
|
||||
g_assert_cmpuint (bytes_read, ==, 2);
|
||||
g_error_free (error);
|
||||
}
|
||||
|
||||
static void
|
||||
test_filename_to_utf8_embedded_nul (void)
|
||||
{
|
||||
g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
|
||||
g_test_trap_assert_passed ();
|
||||
g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
|
||||
g_test_trap_assert_passed ();
|
||||
}
|
||||
|
||||
@ -748,7 +801,7 @@ test_filename_embedded_nul (void)
|
||||
* result in an error.
|
||||
*/
|
||||
static void
|
||||
test_filename_embedded_nul_utf8 (void)
|
||||
test_filename_to_utf8_embedded_nul_utf8 (void)
|
||||
{
|
||||
gchar *res;
|
||||
gsize bytes_read;
|
||||
@ -765,22 +818,75 @@ test_filename_embedded_nul_utf8 (void)
|
||||
g_error_free (error);
|
||||
}
|
||||
|
||||
/* Test that embedded nul characters in output of g_filename_to_utf8(),
|
||||
* when converted from non-UTF8 input, result in an error.
|
||||
/* Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8()
|
||||
* result in an error.
|
||||
*/
|
||||
static void
|
||||
test_filename_embedded_nul_iconv (void)
|
||||
test_filename_to_utf8_embedded_nul_iconv (void)
|
||||
{
|
||||
gchar *res;
|
||||
gsize bytes_read;
|
||||
GError *error = NULL;
|
||||
|
||||
g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
|
||||
g_assert_false (g_get_filename_charsets (NULL));
|
||||
|
||||
res = g_filename_to_utf8 ("ab\0c", 4, NULL, NULL, &error);
|
||||
res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
|
||||
|
||||
g_assert_null (res);
|
||||
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL);
|
||||
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
|
||||
g_assert_cmpuint (bytes_read, ==, 2);
|
||||
g_error_free (error);
|
||||
}
|
||||
|
||||
static void
|
||||
test_filename_from_utf8_embedded_nul (void)
|
||||
{
|
||||
g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
|
||||
g_test_trap_assert_passed ();
|
||||
g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
|
||||
g_test_trap_assert_passed ();
|
||||
}
|
||||
|
||||
/* Test that embedded nul characters in input to g_filename_from_utf8(),
|
||||
* when converting (copying) to UTF-8 output, result in an error.
|
||||
*/
|
||||
static void
|
||||
test_filename_from_utf8_embedded_nul_utf8 (void)
|
||||
{
|
||||
gchar *res;
|
||||
gsize bytes_read;
|
||||
GError *error = NULL;
|
||||
|
||||
g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
|
||||
g_assert_true (g_get_filename_charsets (NULL));
|
||||
|
||||
res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
|
||||
|
||||
g_assert_null (res);
|
||||
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
|
||||
g_assert_cmpuint (bytes_read, ==, 2);
|
||||
g_error_free (error);
|
||||
}
|
||||
|
||||
/* Test that embedded nul characters in input to g_filename_from_utf8(),
|
||||
* when converting to non-UTF-8 output, result in an error.
|
||||
*/
|
||||
static void
|
||||
test_filename_from_utf8_embedded_nul_iconv (void)
|
||||
{
|
||||
gchar *res;
|
||||
gsize bytes_read;
|
||||
GError *error = NULL;
|
||||
|
||||
g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
|
||||
g_assert_false (g_get_filename_charsets (NULL));
|
||||
|
||||
res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
|
||||
|
||||
g_assert_null (res);
|
||||
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
|
||||
g_assert_cmpuint (bytes_read, ==, 2);
|
||||
g_error_free (error);
|
||||
}
|
||||
|
||||
@ -813,12 +919,18 @@ main (int argc, char *argv[])
|
||||
g_test_add_func ("/conversion/unicode", test_unicode_conversions);
|
||||
g_test_add_func ("/conversion/filename-utf8", test_filename_utf8);
|
||||
g_test_add_func ("/conversion/filename-display", test_filename_display);
|
||||
g_test_add_func ("/conversion/locale-embedded-nul", test_locale_embedded_nul);
|
||||
g_test_add_func ("/conversion/locale-embedded-nul/subprocess/utf8", test_locale_embedded_nul_utf8);
|
||||
g_test_add_func ("/conversion/locale-embedded-nul/subprocess/iconv", test_locale_embedded_nul_iconv);
|
||||
g_test_add_func ("/conversion/filename-embedded-nul", test_filename_embedded_nul);
|
||||
g_test_add_func ("/conversion/filename-embedded-nul/subprocess/utf8", test_filename_embedded_nul_utf8);
|
||||
g_test_add_func ("/conversion/filename-embedded-nul/subprocess/iconv", test_filename_embedded_nul_iconv);
|
||||
g_test_add_func ("/conversion/locale-to-utf8/embedded-nul", test_locale_to_utf8_embedded_nul);
|
||||
g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", test_locale_to_utf8_embedded_nul_utf8);
|
||||
g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", test_locale_to_utf8_embedded_nul_iconv);
|
||||
g_test_add_func ("/conversion/locale-from-utf8/embedded-nul", test_locale_from_utf8_embedded_nul);
|
||||
g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", test_locale_from_utf8_embedded_nul_utf8);
|
||||
g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", test_locale_from_utf8_embedded_nul_iconv);
|
||||
g_test_add_func ("/conversion/filename-to-utf8/embedded-nul", test_filename_to_utf8_embedded_nul);
|
||||
g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", test_filename_to_utf8_embedded_nul_utf8);
|
||||
g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", test_filename_to_utf8_embedded_nul_iconv);
|
||||
g_test_add_func ("/conversion/filename-from-utf8/embedded-nul", test_filename_from_utf8_embedded_nul);
|
||||
g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", test_filename_from_utf8_embedded_nul_utf8);
|
||||
g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", test_filename_from_utf8_embedded_nul_iconv);
|
||||
|
||||
return g_test_run ();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user