guri: Fix paths starting with ‘//’ when parsing

This fixes cases where calling `g_uri_to_string()` immediately after a
successful call to `g_uri_parse()` would cause an assertion failure for
URIs like `data:/.//` or `data:.///`.

These appear to be valid URIs. Their paths are normalised to `//` during
parsing as a result of the `remove_dot_segments()` algorithm. This then
falls foul of the restriction from
https://datatracker.ietf.org/doc/html/rfc3986#section-3 that

> When authority is not present, the path cannot begin with two slash
> characters ("//").

This is already encoded in an assertion at the top of
`g_uri_join_internal()`.

The approach of prefixing the path with `/.` should be harmless: it
guarantees the path starts with `/`, prevents `//` being a prefix, and
should always be removed by `remove_dot_segments()` when re-parsing the
URI. It’s the same approach as taken in the WhatWG URL spec for a
similar (but different) situation:
https://url.spec.whatwg.org/#url-serializing (see step 3).

See also: https://gitlab.gnome.org/GNOME/libsoup/-/merge_requests/415?commit_id=79cfd65c9bd8024cd45dd725c284766329873709

Signed-off-by: Philip Withnall <pwithnall@gnome.org>
This commit is contained in:
Philip Withnall 2024-11-23 11:54:34 +00:00
parent dfe252429c
commit 2713d68ad2
No known key found for this signature in database
GPG Key ID: C5C42CFB268637CA
2 changed files with 37 additions and 0 deletions

View File

@ -1535,6 +1535,18 @@ g_uri_parse_relative (GUri *base_uri,
remove_dot_segments (uri->path);
}
/* Fix up the invalid cases from
* https://datatracker.ietf.org/doc/html/rfc3986#section-3, as otherwise
* calling g_uri_to_string() on this URI will fail. These can be caused by
* remove_dot_segments(), e.g. `data:/.//` gets normalised to `data://` whose
* path is invalid given the lack of an authority. */
if (uri->host == NULL && uri->path[0] == '/' && uri->path[1] == '/')
{
char *new_path = g_strconcat ("/.", uri->path, NULL);
g_free (uri->path);
uri->path = g_steal_pointer (&new_path);
}
return g_steal_pointer (&uri);
}

View File

@ -769,6 +769,18 @@ static const UriAbsoluteTest absolute_tests[] = {
/* Invalid IDN hostname */
{ "http://xn--mixed-\xc3\xbcp/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
{ NULL, NULL, NULL, -1, NULL, NULL, NULL } },
/* Paths with double slashes */
{ "data:.///",
G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY | G_URI_FLAGS_ENCODED_FRAGMENT | G_URI_FLAGS_SCHEME_NORMALIZE,
TRUE, 0,
{ "data", NULL, NULL, -1, "/.//", NULL, NULL }
},
{ "data:/.//",
G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY | G_URI_FLAGS_ENCODED_FRAGMENT | G_URI_FLAGS_SCHEME_NORMALIZE,
TRUE, 0,
{ "data", NULL, NULL, -1, "/.//", NULL, NULL }
},
};
static void
@ -1743,6 +1755,19 @@ test_uri_join (void)
uri = g_uri_join (G_URI_FLAGS_NONE, "scheme", NULL, "foo:bar._webdav._tcp.local", -1, "", NULL, NULL);
g_assert_cmpstr (uri, ==, "scheme://foo%3Abar._webdav._tcp.local");
g_free (uri);
uri = g_uri_join (G_URI_FLAGS_NONE, "data", NULL, NULL, -1, "/.//", NULL, NULL);
g_assert_cmpstr (uri, ==, "data:/.//");
g_free (uri);
uri = g_uri_join (G_URI_FLAGS_NONE, "data", NULL, NULL, -1, ".///", NULL, NULL);
g_assert_cmpstr (uri, ==, "data:.///");
g_free (uri);
/* From https://url.spec.whatwg.org/#url-serializing */
uri = g_uri_join (G_URI_FLAGS_NONE, "web+demo", NULL, NULL, -1, "/.//not-a-host/", NULL, NULL);
g_assert_cmpstr (uri, ==, "web+demo:/.//not-a-host/");
g_free (uri);
}
static void