From b504cc0841b38dc15e526554373d4aec2ccde04e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Tyrychtr?= Date: Fri, 14 Jul 2023 13:02:28 +0200 Subject: [PATCH] gfile: Strip query sections from file: URIs According to https://url.spec.whatwg.org/#file-state a file URI can have a fragment and query string, so just ignore them and don't raise an invalid URI error. Fixes: #3050 --- gio/glocalvfs.c | 14 ++++++++++++-- gio/tests/file.c | 34 ++++++++++++++++++++++++++++++++++ glib/gconvert.c | 36 +++++++++++++++++++++++++----------- glib/tests/convert.c | 34 ++++++++++++++++++++++++++++++++++ glib/tests/uri.c | 3 ++- 5 files changed, 107 insertions(+), 14 deletions(-) diff --git a/gio/glocalvfs.c b/gio/glocalvfs.c index a830cc350..00fec8e2b 100644 --- a/gio/glocalvfs.c +++ b/gio/glocalvfs.c @@ -94,14 +94,24 @@ g_local_vfs_get_file_for_uri (GVfs *vfs, { char *path; GFile *file; - char *stripped_uri, *hash; - + char *stripped_uri, *hash, *question_mark; + + /* As per https://url.spec.whatwg.org/#file-state, file: URIs can contain + * query and fragment sections. We ignore them in order to get only the file + * path. Compliance to this part of the WhatWG spec doesn’t necessarily mean + * we comply with the entire spec. */ if (strchr (uri, '#') != NULL) { stripped_uri = g_strdup (uri); hash = strchr (stripped_uri, '#'); *hash = 0; } + else if (strchr (uri, '?') != NULL) + { + stripped_uri = g_strdup (uri); + question_mark = strchr (stripped_uri, '?'); + *question_mark = 0; + } else stripped_uri = (char *)uri; diff --git a/gio/tests/file.c b/gio/tests/file.c index 69c25ff69..310981b5d 100644 --- a/gio/tests/file.c +++ b/gio/tests/file.c @@ -3935,6 +3935,38 @@ test_enumerator_cancellation (void) g_object_unref (dir); } +static void +test_from_uri_ignores_fragment (void) +{ + GFile *file; + gchar *path; + file = g_file_new_for_uri ("file:///tmp/foo#bar"); + path = g_file_get_path (file); +#ifdef G_OS_WIN32 + g_assert_cmpstr (path, ==, "\\tmp\\foo"); +#else + g_assert_cmpstr (path, ==, "/tmp/foo"); +#endif + g_free (path); + g_object_unref (file); +} + +static void +test_from_uri_ignores_query_string (void) +{ + GFile *file; + gchar *path; + file = g_file_new_for_uri ("file:///tmp/foo?bar"); + path = g_file_get_path (file); +#ifdef G_OS_WIN32 + g_assert_cmpstr (path, ==, "\\tmp\\foo"); +#else + g_assert_cmpstr (path, ==, "/tmp/foo"); +#endif + g_free (path); + g_object_unref (file); +} + int main (int argc, char *argv[]) { @@ -3990,6 +4022,8 @@ main (int argc, char *argv[]) g_test_add_func ("/file/query-default-handler-uri", test_query_default_handler_uri); g_test_add_func ("/file/query-default-handler-uri-async", test_query_default_handler_uri_async); g_test_add_func ("/file/enumerator-cancellation", test_enumerator_cancellation); + g_test_add_func ("/file/from-uri/ignores-query-string", test_from_uri_ignores_query_string); + g_test_add_func ("/file/from-uri/ignores-fragment", test_from_uri_ignores_fragment); return g_test_run (); } diff --git a/glib/gconvert.c b/glib/gconvert.c index 190909dd8..69bcc2f97 100644 --- a/glib/gconvert.c +++ b/glib/gconvert.c @@ -1662,7 +1662,12 @@ hostname_validate (const char *hostname) * errors. Any of the errors in #GConvertError may occur. * * Converts an escaped ASCII-encoded URI to a local filename in the - * encoding used for filenames. + * encoding used for filenames. + * + * Since GLib 2.78, the query string and fragment can be present in the URI, + * but are not part of the resulting filename. + * We take inspiration from https://url.spec.whatwg.org/#file-state, + * but we don't support the entire standard. * * Returns: (type filename): a newly-allocated string holding * the resulting filename, or %NULL on an error. @@ -1677,6 +1682,8 @@ g_filename_from_uri (const gchar *uri, char *unescaped_hostname; char *result; char *filename; + char *past_path; + char *temp_uri; int offs; #ifdef G_OS_WIN32 char *p, *slash; @@ -1692,17 +1699,19 @@ g_filename_from_uri (const gchar *uri, uri); return NULL; } + + temp_uri = g_strdup (uri); + + past_scheme = temp_uri + strlen ("file:"); - past_scheme = uri + strlen ("file:"); - - if (strchr (past_scheme, '#') != NULL) - { - g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI, - _("The local file URI “%s” may not include a “#”"), - uri); - return NULL; - } - + past_path = strchr (past_scheme, '?'); + if (past_path != NULL) + *past_path = '\0'; + + past_path = strchr (past_scheme, '#'); + if (past_path != NULL) + *past_path = '\0'; + if (has_case_prefix (past_scheme, "///")) past_scheme += 2; else if (has_case_prefix (past_scheme, "//")) @@ -1714,6 +1723,7 @@ g_filename_from_uri (const gchar *uri, if (past_scheme == NULL) { + g_free (temp_uri); g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI, _("The URI “%s” is invalid"), uri); @@ -1726,6 +1736,7 @@ g_filename_from_uri (const gchar *uri, !hostname_validate (unescaped_hostname)) { g_free (unescaped_hostname); + g_free (temp_uri); g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI, _("The hostname of the URI “%s” is invalid"), uri); @@ -1742,6 +1753,7 @@ g_filename_from_uri (const gchar *uri, if (filename == NULL) { + g_free (temp_uri); g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI, _("The URI “%s” contains invalidly escaped characters"), uri); @@ -1785,6 +1797,8 @@ g_filename_from_uri (const gchar *uri, result = g_strdup (filename + offs); g_free (filename); + g_free (temp_uri); + return result; } diff --git a/glib/tests/convert.c b/glib/tests/convert.c index 6daeec237..6a7bfaf72 100644 --- a/glib/tests/convert.c +++ b/glib/tests/convert.c @@ -949,6 +949,38 @@ test_no_conv (void) g_error_free (error); } +static void +test_filename_from_uri_query_is_ignored (void) +{ + gchar *filename; + GError *error = NULL; + + filename = g_filename_from_uri ("file:///tmp/foo?bar", NULL, &error); + g_assert_no_error (error); +#ifdef G_OS_WIN32 + g_assert_cmpstr (filename, ==, "\\tmp\\foo"); +#else + g_assert_cmpstr (filename, ==, "/tmp/foo"); +#endif + g_free (filename); +} + +static void +test_filename_from_uri_fragment_is_ignored (void) +{ + gchar *filename; + GError *error = NULL; + + filename = g_filename_from_uri ("file:///tmp/foo#bar", NULL, &error); + g_assert_no_error (error); +#ifdef G_OS_WIN32 + g_assert_cmpstr (filename, ==, "\\tmp\\foo"); +#else + g_assert_cmpstr (filename, ==, "/tmp/foo"); +#endif + g_free (filename); +} + int main (int argc, char *argv[]) { @@ -974,6 +1006,8 @@ main (int argc, char *argv[]) g_test_add_func ("/conversion/filename-from-utf8/embedded-nul", test_filename_from_utf8_embedded_nul); g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", test_filename_from_utf8_embedded_nul_utf8); g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", test_filename_from_utf8_embedded_nul_iconv); + g_test_add_func ("/conversion/filename-from-uri/query-is-ignored", test_filename_from_uri_query_is_ignored); + g_test_add_func ("/conversion/filename-from-uri/fragment-is-ignored", test_filename_from_uri_fragment_is_ignored); return g_test_run (); } diff --git a/glib/tests/uri.c b/glib/tests/uri.c index 0f32888d2..94a0d5dac 100644 --- a/glib/tests/uri.c +++ b/glib/tests/uri.c @@ -130,7 +130,8 @@ file_from_uri_tests[] = { { "file:////etc/%C3%B6%C3%C3%C3%A5", "//etc/\xc3\xb6\xc3\xc3\xc3\xa5", NULL, 0 }, { "file://\xE5\xE4\xF6/etc", NULL, NULL, G_CONVERT_ERROR_BAD_URI}, { "file://%E5%E4%F6/etc", NULL, NULL, G_CONVERT_ERROR_BAD_URI}, - { "file:///some/file#bad", NULL, NULL, G_CONVERT_ERROR_BAD_URI}, + { "file:///some/file?query", "/some/file", NULL, 0 }, + { "file:///some/file#bad", "/some/file", NULL, 0 }, { "file://some", NULL, NULL, G_CONVERT_ERROR_BAD_URI}, { "", NULL, NULL, G_CONVERT_ERROR_BAD_URI}, { "file:test", NULL, NULL, G_CONVERT_ERROR_BAD_URI},