mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-27 22:46:15 +01:00
Merge branch 'normalize-utf8-bounds-checking' into 'main'
g_utf8_normalize: don't read past the end of the buffer See merge request GNOME/glib!3341
This commit is contained in:
commit
353f2e4b3c
@ -388,9 +388,33 @@ _g_utf8_normalize_wc (const gchar *str,
|
|||||||
while ((max_len < 0 || p < str + max_len) && *p)
|
while ((max_len < 0 || p < str + max_len) && *p)
|
||||||
{
|
{
|
||||||
const gchar *decomp;
|
const gchar *decomp;
|
||||||
gunichar wc = g_utf8_get_char (p);
|
const char *next, *between;
|
||||||
|
gunichar wc;
|
||||||
|
|
||||||
if (wc >= SBase && wc < SBase + SCount)
|
next = g_utf8_next_char (p);
|
||||||
|
/* Avoid reading truncated multibyte characters
|
||||||
|
which run past the end of the buffer */
|
||||||
|
if (max_len < 0)
|
||||||
|
{
|
||||||
|
/* Does the character contain a NUL terminator? */
|
||||||
|
for (between = &p[1]; between < next; between++)
|
||||||
|
{
|
||||||
|
if (G_UNLIKELY (!*between))
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (G_UNLIKELY (next > str + max_len))
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
wc = g_utf8_get_char (p);
|
||||||
|
|
||||||
|
if (G_UNLIKELY (wc == (gunichar) -1))
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else if (wc >= SBase && wc < SBase + SCount)
|
||||||
{
|
{
|
||||||
gsize result_len;
|
gsize result_len;
|
||||||
decompose_hangul (wc, NULL, &result_len);
|
decompose_hangul (wc, NULL, &result_len);
|
||||||
@ -406,7 +430,7 @@ _g_utf8_normalize_wc (const gchar *str,
|
|||||||
n_wc++;
|
n_wc++;
|
||||||
}
|
}
|
||||||
|
|
||||||
p = g_utf8_next_char (p);
|
p = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
wc_buffer = g_new (gunichar, n_wc + 1);
|
wc_buffer = g_new (gunichar, n_wc + 1);
|
||||||
@ -548,10 +572,13 @@ g_utf8_normalize (const gchar *str,
|
|||||||
GNormalizeMode mode)
|
GNormalizeMode mode)
|
||||||
{
|
{
|
||||||
gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
|
gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
|
||||||
gchar *result;
|
gchar *result = NULL;
|
||||||
|
|
||||||
result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
|
if (G_LIKELY (result_wc != NULL))
|
||||||
g_free (result_wc);
|
{
|
||||||
|
result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
|
||||||
|
g_free (result_wc);
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -146,12 +146,50 @@ test_unicode_normalize (void)
|
|||||||
g_string_free (buffer, TRUE);
|
g_string_free (buffer, TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_unicode_normalize_invalid (void)
|
||||||
|
{
|
||||||
|
/* g_utf8_normalize() should return NULL for all of these invalid inputs */
|
||||||
|
const struct
|
||||||
|
{
|
||||||
|
gssize max_len;
|
||||||
|
const gchar *str;
|
||||||
|
} test_vectors[] = {
|
||||||
|
/* input ending with truncated multibyte encoding */
|
||||||
|
{ -1, "\xC0" },
|
||||||
|
{ 1, "\xC0\x80" },
|
||||||
|
{ -1, "\xE0\x80" },
|
||||||
|
{ 2, "\xE0\x80\x80" },
|
||||||
|
{ -1, "\xF0\x80\x80" },
|
||||||
|
{ 3, "\xF0\x80\x80\x80" },
|
||||||
|
{ -1, "\xF8\x80\x80\x80" },
|
||||||
|
{ 4, "\xF8\x80\x80\x80\x80" },
|
||||||
|
{ 3, "\x20\xE2\x84\xAA" },
|
||||||
|
{ -1, "\x20\xE2\x00\xAA" },
|
||||||
|
{ -1, "\xC0\x80\xE0\x80" },
|
||||||
|
{ 4, "\xC0\x80\xE0\x80\x80" },
|
||||||
|
/* input containing invalid multibyte encoding */
|
||||||
|
{ -1, "\xED\x85\x9C\xED\x15\x9C\xED\x85\x9C" },
|
||||||
|
};
|
||||||
|
gsize i;
|
||||||
|
|
||||||
|
for (i = 0; i < G_N_ELEMENTS (test_vectors); i++)
|
||||||
|
{
|
||||||
|
g_test_message ("Invalid UTF-8 vector %" G_GSIZE_FORMAT, i);
|
||||||
|
g_assert_null (g_utf8_normalize (test_vectors[i].str,
|
||||||
|
test_vectors[i].max_len,
|
||||||
|
G_NORMALIZE_ALL));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main (int argc, char **argv)
|
main (int argc, char **argv)
|
||||||
{
|
{
|
||||||
g_test_init (&argc, &argv, NULL);
|
g_test_init (&argc, &argv, NULL);
|
||||||
|
|
||||||
g_test_add_func ("/unicode/normalize", test_unicode_normalize);
|
g_test_add_func ("/unicode/normalize", test_unicode_normalize);
|
||||||
|
g_test_add_func ("/unicode/normalize-invalid",
|
||||||
|
test_unicode_normalize_invalid);
|
||||||
|
|
||||||
return g_test_run ();
|
return g_test_run ();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user