gconvert: Consistently validate inputs and outputs for embedded NULs

String inputs to convenience conversion functions g_locale_from_utf8(),
g_filename_from_utf8(), and g_filename_to_utf8(), are annotated for the
bindings as NUL-terminated strings of (type utf8) or (type filename).
There is also a len parameter that allows converting part of the string,
but it is exposed to the bindings as a value independent from the string
buffer. Absent any more sophisticated ways to annotate, the way to
provide a safeguard against len argument values longer than the
string length is to check that no nul is encountered within the first
len bytes of the string. strdup_len() includes this check as part of
UTF-8 validation, but g_convert() permits embedded nuls.

For g_filename_from_utf8(), also check the output to prevent embedded NUL
bytes. It's not safe to allow embedded NULs in a string that is going
to be used as (type filename), and no known bytestring encoding for
file names allows them.

https://bugzilla.gnome.org/show_bug.cgi?id=792516
This commit is contained in:
Mikhail Zabaluev 2018-01-17 09:25:23 +02:00 committed by Philip Withnall
parent d584ff77f6
commit f35a6a7031
2 changed files with 205 additions and 48 deletions

View File

@ -866,28 +866,67 @@ strdup_len (const gchar *string,
return g_strndup (string, real_len);
}
static gchar *
convert_to_utf8 (const gchar *opsysstring,
gssize len,
const gchar *charset,
gsize *bytes_read,
gsize *bytes_written,
GError **error)
typedef enum
{
gchar *utf8;
CONVERT_CHECK_NO_NULS_IN_INPUT = 1 << 0,
CONVERT_CHECK_NO_NULS_IN_OUTPUT = 1 << 1
} ConvertCheckFlags;
/*
* Convert from @string in the encoding identified by @from_codeset,
* returning a string in the encoding identifed by @to_codeset.
* @len can be negative if @string is nul-terminated, or a non-negative
* value in bytes. Flags defined in #ConvertCheckFlags can be set in @flags
* to check the input, the output, or both, for embedded nul bytes.
* On success, @bytes_read, if provided, will be set to the number of bytes
* in @string up to @len or the terminating nul byte, and @bytes_written, if
* provided, will be set to the number of output bytes written into the
* returned buffer, excluding the terminating nul sequence.
* On error, @bytes_read will be set to the byte offset after the last valid
* sequence in @string, and @bytes_written will be set to 0.
*/
static gchar *
convert_checked (const gchar *string,
gssize len,
const gchar *to_codeset,
const gchar *from_codeset,
ConvertCheckFlags flags,
gsize *bytes_read,
gsize *bytes_written,
GError **error)
{
gchar *out;
gsize outbytes;
utf8 = g_convert (opsysstring, len, "UTF-8", charset,
bytes_read, &outbytes, error);
if (utf8 == NULL)
if ((flags & CONVERT_CHECK_NO_NULS_IN_INPUT) && len > 0)
{
const gchar *early_nul = memchr (string, '\0', len);
if (early_nul != NULL)
{
if (bytes_read)
*bytes_read = early_nul - string;
if (bytes_written)
*bytes_written = 0;
g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
_("Embedded NUL byte in conversion input"));
return NULL;
}
}
out = g_convert (string, len, to_codeset, from_codeset,
bytes_read, &outbytes, error);
if (out == NULL)
{
if (bytes_written)
*bytes_written = 0;
return NULL;
}
if (memchr (utf8, '\0', outbytes) != NULL)
if ((flags & CONVERT_CHECK_NO_NULS_IN_OUTPUT)
&& memchr (out, '\0', outbytes) != NULL)
{
g_free (utf8);
g_free (out);
if (bytes_written)
*bytes_written = 0;
g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL,
@ -897,7 +936,7 @@ convert_to_utf8 (const gchar *opsysstring,
if (bytes_written)
*bytes_written = outbytes;
return utf8;
return out;
}
/**
@ -948,7 +987,8 @@ g_locale_to_utf8 (const gchar *opsysstring,
if (g_get_charset (&charset))
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
else
return convert_to_utf8 (opsysstring, len, charset,
return convert_checked (opsysstring, len, "UTF-8", charset,
CONVERT_CHECK_NO_NULS_IN_OUTPUT,
bytes_read, bytes_written, error);
}
@ -975,8 +1015,8 @@ g_locale_to_utf8 (const gchar *opsysstring,
* system) in the [current locale][setlocale]. On Windows this means
* the system codepage.
*
* The input string should not contain nul characters even if the @len
* argument is positive. A nul character found inside the string may result
* The input string shall not contain nul characters even if the @len
* argument is positive. A nul character found inside the string will result
* in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert
* input that may contain embedded nul characters.
*
@ -995,8 +1035,9 @@ g_locale_from_utf8 (const gchar *utf8string,
if (g_get_charset (&charset))
return strdup_len (utf8string, len, bytes_read, bytes_written, error);
else
return g_convert (utf8string, len,
charset, "UTF-8", bytes_read, bytes_written, error);
return convert_checked (utf8string, len, charset, "UTF-8",
CONVERT_CHECK_NO_NULS_IN_INPUT,
bytes_read, bytes_written, error);
}
#ifndef G_PLATFORM_WIN32
@ -1184,12 +1225,12 @@ get_filename_charset (const gchar **filename_charset)
* for filenames; on other platforms, this function indirectly depends on
* the [current locale][setlocale].
*
* The input string shall not contain nul characters even if the @len
* argument is positive. A nul character found inside the string will result
* in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
* If the source encoding is not UTF-8 and the conversion output contains a
* nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
* function returns %NULL.
* If the source encoding is UTF-8, an embedded nul character is treated with
* the %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error for backward compatibility with
* earlier versions of this library. Use g_convert() to produce output that
* function returns %NULL. Use g_convert() to produce output that
* may contain embedded nul characters.
*
* Returns: The converted string, or %NULL on an error.
@ -1208,7 +1249,9 @@ g_filename_to_utf8 (const gchar *opsysstring,
if (get_filename_charset (&charset))
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
else
return convert_to_utf8 (opsysstring, len, charset,
return convert_checked (opsysstring, len, "UTF-8", charset,
CONVERT_CHECK_NO_NULS_IN_INPUT |
CONVERT_CHECK_NO_NULS_IN_OUTPUT,
bytes_read, bytes_written, error);
}
@ -1235,8 +1278,8 @@ g_filename_to_utf8 (const gchar *opsysstring,
* on other platforms, this function indirectly depends on the
* [current locale][setlocale].
*
* The input string should not contain nul characters even if the @len
* argument is positive. A nul character found inside the string may result
* The input string shall not contain nul characters even if the @len
* argument is positive. A nul character found inside the string will result
* in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Note that nul bytes are
* prohibited in all filename encodings that GLib is known to work with.
*
@ -1255,8 +1298,10 @@ g_filename_from_utf8 (const gchar *utf8string,
if (get_filename_charset (&charset))
return strdup_len (utf8string, len, bytes_read, bytes_written, error);
else
return g_convert (utf8string, len,
charset, "UTF-8", bytes_read, bytes_written, error);
return convert_checked (utf8string, len, charset, "UTF-8",
CONVERT_CHECK_NO_NULS_IN_INPUT |
CONVERT_CHECK_NO_NULS_IN_OUTPUT,
bytes_read, bytes_written, error);
}
/* Test of haystack has the needle prefix, comparing case

View File

@ -685,11 +685,11 @@ test_filename_display (void)
}
static void
test_locale_embedded_nul (void)
test_locale_to_utf8_embedded_nul (void)
{
g_test_trap_subprocess ("/conversion/locale-embedded-nul/subprocess/utf8", 0, 0);
g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
g_test_trap_assert_passed ();
g_test_trap_subprocess ("/conversion/locale-embedded-nul/subprocess/iconv", 0, 0);
g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
g_test_trap_assert_passed ();
}
@ -697,7 +697,7 @@ test_locale_embedded_nul (void)
* result in an error.
*/
static void
test_locale_embedded_nul_utf8 (void)
test_locale_to_utf8_embedded_nul_utf8 (void)
{
gchar *res;
gsize bytes_read;
@ -719,7 +719,7 @@ test_locale_embedded_nul_utf8 (void)
* when converted from non-UTF8 input, result in an error.
*/
static void
test_locale_embedded_nul_iconv (void)
test_locale_to_utf8_embedded_nul_iconv (void)
{
gchar *res;
GError *error = NULL;
@ -736,11 +736,64 @@ test_locale_embedded_nul_iconv (void)
}
static void
test_filename_embedded_nul (void)
test_locale_from_utf8_embedded_nul (void)
{
g_test_trap_subprocess ("/conversion/filename-embedded-nul/subprocess/utf8", 0, 0);
g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
g_test_trap_assert_passed ();
g_test_trap_subprocess ("/conversion/filename-embedded-nul/subprocess/iconv", 0, 0);
g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
g_test_trap_assert_passed ();
}
/* Test that embedded nul characters in input to g_locale_from_utf8(),
* when converting (copying) to UTF-8 output, result in an error.
*/
static void
test_locale_from_utf8_embedded_nul_utf8 (void)
{
gchar *res;
gsize bytes_read;
GError *error = NULL;
setlocale (LC_ALL, "");
g_setenv ("CHARSET", "UTF-8", TRUE);
g_assert_true (g_get_charset (NULL));
res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
g_assert_null (res);
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
g_assert_cmpuint (bytes_read, ==, 2);
g_error_free (error);
}
/* Test that embedded nul characters in input to g_locale_from_utf8(),
* when converting to non-UTF-8 output, result in an error.
*/
static void
test_locale_from_utf8_embedded_nul_iconv (void)
{
gchar *res;
gsize bytes_read;
GError *error = NULL;
setlocale (LC_ALL, "C");
g_setenv ("CHARSET", "US-ASCII", TRUE);
g_assert_false (g_get_charset (NULL));
res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
g_assert_null (res);
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
g_assert_cmpuint (bytes_read, ==, 2);
g_error_free (error);
}
static void
test_filename_to_utf8_embedded_nul (void)
{
g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
g_test_trap_assert_passed ();
g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
g_test_trap_assert_passed ();
}
@ -748,7 +801,7 @@ test_filename_embedded_nul (void)
* result in an error.
*/
static void
test_filename_embedded_nul_utf8 (void)
test_filename_to_utf8_embedded_nul_utf8 (void)
{
gchar *res;
gsize bytes_read;
@ -765,22 +818,75 @@ test_filename_embedded_nul_utf8 (void)
g_error_free (error);
}
/* Test that embedded nul characters in output of g_filename_to_utf8(),
* when converted from non-UTF8 input, result in an error.
/* Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8()
* result in an error.
*/
static void
test_filename_embedded_nul_iconv (void)
test_filename_to_utf8_embedded_nul_iconv (void)
{
gchar *res;
gsize bytes_read;
GError *error = NULL;
g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
g_assert_false (g_get_filename_charsets (NULL));
res = g_filename_to_utf8 ("ab\0c", 4, NULL, NULL, &error);
res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
g_assert_null (res);
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL);
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
g_assert_cmpuint (bytes_read, ==, 2);
g_error_free (error);
}
static void
test_filename_from_utf8_embedded_nul (void)
{
g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
g_test_trap_assert_passed ();
g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
g_test_trap_assert_passed ();
}
/* Test that embedded nul characters in input to g_filename_from_utf8(),
* when converting (copying) to UTF-8 output, result in an error.
*/
static void
test_filename_from_utf8_embedded_nul_utf8 (void)
{
gchar *res;
gsize bytes_read;
GError *error = NULL;
g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
g_assert_true (g_get_filename_charsets (NULL));
res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
g_assert_null (res);
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
g_assert_cmpuint (bytes_read, ==, 2);
g_error_free (error);
}
/* Test that embedded nul characters in input to g_filename_from_utf8(),
* when converting to non-UTF-8 output, result in an error.
*/
static void
test_filename_from_utf8_embedded_nul_iconv (void)
{
gchar *res;
gsize bytes_read;
GError *error = NULL;
g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
g_assert_false (g_get_filename_charsets (NULL));
res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
g_assert_null (res);
g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
g_assert_cmpuint (bytes_read, ==, 2);
g_error_free (error);
}
@ -813,12 +919,18 @@ main (int argc, char *argv[])
g_test_add_func ("/conversion/unicode", test_unicode_conversions);
g_test_add_func ("/conversion/filename-utf8", test_filename_utf8);
g_test_add_func ("/conversion/filename-display", test_filename_display);
g_test_add_func ("/conversion/locale-embedded-nul", test_locale_embedded_nul);
g_test_add_func ("/conversion/locale-embedded-nul/subprocess/utf8", test_locale_embedded_nul_utf8);
g_test_add_func ("/conversion/locale-embedded-nul/subprocess/iconv", test_locale_embedded_nul_iconv);
g_test_add_func ("/conversion/filename-embedded-nul", test_filename_embedded_nul);
g_test_add_func ("/conversion/filename-embedded-nul/subprocess/utf8", test_filename_embedded_nul_utf8);
g_test_add_func ("/conversion/filename-embedded-nul/subprocess/iconv", test_filename_embedded_nul_iconv);
g_test_add_func ("/conversion/locale-to-utf8/embedded-nul", test_locale_to_utf8_embedded_nul);
g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", test_locale_to_utf8_embedded_nul_utf8);
g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", test_locale_to_utf8_embedded_nul_iconv);
g_test_add_func ("/conversion/locale-from-utf8/embedded-nul", test_locale_from_utf8_embedded_nul);
g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", test_locale_from_utf8_embedded_nul_utf8);
g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", test_locale_from_utf8_embedded_nul_iconv);
g_test_add_func ("/conversion/filename-to-utf8/embedded-nul", test_filename_to_utf8_embedded_nul);
g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", test_filename_to_utf8_embedded_nul_utf8);
g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", test_filename_to_utf8_embedded_nul_iconv);
g_test_add_func ("/conversion/filename-from-utf8/embedded-nul", test_filename_from_utf8_embedded_nul);
g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", test_filename_from_utf8_embedded_nul_utf8);
g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", test_filename_from_utf8_embedded_nul_iconv);
return g_test_run ();
}