From 439b29087849b4533c4a76f5ddd7a8a29c007ae9 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Mon, 5 Oct 2020 12:10:40 +0100 Subject: [PATCH] guri: Fix UTF-8 validation when escaping URI components The return value from `g_utf8_get_char_validated()` is a `gunichar`, which is unsigned, so comparing it with `> 0` is always going to return true, even for return values `(gunichar) -1` and `(gunichar) -2`, which indicate errors. Handle them more explicitly. oss-fuzz#26083 Signed-off-by: Philip Withnall --- glib/guri.c | 9 +++++++-- glib/tests/uri.c | 4 ++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/glib/guri.c b/glib/guri.c index e337c9e24..f04139b80 100644 --- a/glib/guri.c +++ b/glib/guri.c @@ -420,8 +420,13 @@ _uri_encoder (GString *out, while (p < end) { - if (allow_utf8 && *p >= 0x80 && - g_utf8_get_char_validated ((gchar *)p, end - p) > 0) + gunichar multibyte_utf8_char = 0; + + if (allow_utf8 && *p >= 0x80) + multibyte_utf8_char = g_utf8_get_char_validated ((gchar *)p, end - p); + + if (multibyte_utf8_char > 0 && + multibyte_utf8_char != (gunichar) -1 && multibyte_utf8_char != (gunichar) -2) { gint len = g_utf8_skip [*p]; g_string_append_len (out, (gchar *)p, len); diff --git a/glib/tests/uri.c b/glib/tests/uri.c index 31bef2733..b3843b978 100644 --- a/glib/tests/uri.c +++ b/glib/tests/uri.c @@ -466,6 +466,10 @@ test_uri_escape_string (void) { ":+ \\?#", NULL, FALSE, "%3A%2B%20%5C%3F%23" }, { "a+b:c", "+", FALSE, "a+b%3Ac" }, { "a+b:c\303\234", "+", TRUE, "a+b%3Ac\303\234" }, + /* Incomplete UTF-8 sequence: */ + { "\xfc\x3b\xd2", NULL, TRUE, "%FC%3B%D2" }, + /* Invalid sequence: */ + { "\xc3\xb1\xc3\x28", NULL, TRUE, "ñ%C3%28" }, }; gsize i;