From 2713d68ad28f168f04cfd65a65d04f06ba491466 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Sat, 23 Nov 2024 11:54:34 +0000 Subject: [PATCH] =?UTF-8?q?guri:=20Fix=20paths=20starting=20with=20?= =?UTF-8?q?=E2=80=98//=E2=80=99=20when=20parsing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes cases where calling `g_uri_to_string()` immediately after a successful call to `g_uri_parse()` would cause an assertion failure for URIs like `data:/.//` or `data:.///`. These appear to be valid URIs. Their paths are normalised to `//` during parsing as a result of the `remove_dot_segments()` algorithm. This then falls foul of the restriction from https://datatracker.ietf.org/doc/html/rfc3986#section-3 that > When authority is not present, the path cannot begin with two slash > characters ("//"). This is already encoded in an assertion at the top of `g_uri_join_internal()`. The approach of prefixing the path with `/.` should be harmless: it guarantees the path starts with `/`, prevents `//` being a prefix, and should always be removed by `remove_dot_segments()` when re-parsing the URI. It’s the same approach as taken in the WhatWG URL spec for a similar (but different) situation: https://url.spec.whatwg.org/#url-serializing (see step 3). See also: https://gitlab.gnome.org/GNOME/libsoup/-/merge_requests/415?commit_id=79cfd65c9bd8024cd45dd725c284766329873709 Signed-off-by: Philip Withnall --- glib/guri.c | 12 ++++++++++++ glib/tests/uri.c | 25 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/glib/guri.c b/glib/guri.c index 87c51e967..2fb95a331 100644 --- a/glib/guri.c +++ b/glib/guri.c @@ -1535,6 +1535,18 @@ g_uri_parse_relative (GUri *base_uri, remove_dot_segments (uri->path); } + /* Fix up the invalid cases from + * https://datatracker.ietf.org/doc/html/rfc3986#section-3, as otherwise + * calling g_uri_to_string() on this URI will fail. These can be caused by + * remove_dot_segments(), e.g. `data:/.//` gets normalised to `data://` whose + * path is invalid given the lack of an authority. */ + if (uri->host == NULL && uri->path[0] == '/' && uri->path[1] == '/') + { + char *new_path = g_strconcat ("/.", uri->path, NULL); + g_free (uri->path); + uri->path = g_steal_pointer (&new_path); + } + return g_steal_pointer (&uri); } diff --git a/glib/tests/uri.c b/glib/tests/uri.c index 5e12ab27c..fc2298f0c 100644 --- a/glib/tests/uri.c +++ b/glib/tests/uri.c @@ -769,6 +769,18 @@ static const UriAbsoluteTest absolute_tests[] = { /* Invalid IDN hostname */ { "http://xn--mixed-\xc3\xbcp/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST, { NULL, NULL, NULL, -1, NULL, NULL, NULL } }, + + /* Paths with double slashes */ + { "data:.///", + G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY | G_URI_FLAGS_ENCODED_FRAGMENT | G_URI_FLAGS_SCHEME_NORMALIZE, + TRUE, 0, + { "data", NULL, NULL, -1, "/.//", NULL, NULL } + }, + { "data:/.//", + G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY | G_URI_FLAGS_ENCODED_FRAGMENT | G_URI_FLAGS_SCHEME_NORMALIZE, + TRUE, 0, + { "data", NULL, NULL, -1, "/.//", NULL, NULL } + }, }; static void @@ -1743,6 +1755,19 @@ test_uri_join (void) uri = g_uri_join (G_URI_FLAGS_NONE, "scheme", NULL, "foo:bar._webdav._tcp.local", -1, "", NULL, NULL); g_assert_cmpstr (uri, ==, "scheme://foo%3Abar._webdav._tcp.local"); g_free (uri); + + uri = g_uri_join (G_URI_FLAGS_NONE, "data", NULL, NULL, -1, "/.//", NULL, NULL); + g_assert_cmpstr (uri, ==, "data:/.//"); + g_free (uri); + + uri = g_uri_join (G_URI_FLAGS_NONE, "data", NULL, NULL, -1, ".///", NULL, NULL); + g_assert_cmpstr (uri, ==, "data:.///"); + g_free (uri); + + /* From https://url.spec.whatwg.org/#url-serializing */ + uri = g_uri_join (G_URI_FLAGS_NONE, "web+demo", NULL, NULL, -1, "/.//not-a-host/", NULL, NULL); + g_assert_cmpstr (uri, ==, "web+demo:/.//not-a-host/"); + g_free (uri); } static void