From 3521763532f89ef1e97db1b4e7f5ade2790c3eb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 28 Jul 2020 14:07:49 +0400 Subject: [PATCH 01/12] uri: add some note about the API scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau --- glib/guri.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/glib/guri.c b/glib/guri.c index 6179ea01d..1b1c3ef66 100644 --- a/glib/guri.c +++ b/glib/guri.c @@ -33,6 +33,11 @@ * The #GUri type and related functions can be used to parse URIs into * their components, and build valid URIs from individual components. * + * Note that #GUri scope is to help manipulate URIs in various applications, + * following the RFC 3986. In particular, it doesn't intend to cover web browser + * needs, and doesn't implement the WHATWG URL standard. No APIs are provided to + * help prevent homograph attacks. + * * ## Parsing URIs * * The most minimalist APIs for parsing URIs are g_uri_split() and From 19c0db318539bad6310075eeee08f57476daa934 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 28 Jul 2020 14:14:08 +0400 Subject: [PATCH 02/12] uri: improve some documentation about absolute URIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As pointed out in the discussion https://gitlab.gnome.org/GNOME/glib/-/issues/2169. Signed-off-by: Marc-André Lureau --- glib/guri.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/glib/guri.c b/glib/guri.c index 1b1c3ef66..06f930dc2 100644 --- a/glib/guri.c +++ b/glib/guri.c @@ -965,7 +965,7 @@ g_uri_split_with_user (const gchar *uri_string, /** * g_uri_split_network: - * @uri_string: a string containing a relative or absolute URI + * @uri_string: a string containing an absolute URI * @flags: flags for parsing @uri_string * @scheme: (out) (nullable) (optional) (transfer full): on return, contains * the scheme (converted to lowercase), or %NULL @@ -1156,7 +1156,7 @@ g_uri_parse (const gchar *uri_string, /** * g_uri_parse_relative: - * @base_uri: (nullable): a base URI + * @base_uri: (nullable): a base absolute URI * @uri_string: a string representing a relative or absolute URI * @flags: flags describing how to parse @uri_string * @error: #GError for error reporting, or %NULL to ignore. @@ -1446,13 +1446,13 @@ g_uri_join_internal (GUriFlags flags, * @fragment: (nullable): the fragment, or %NULL * * Joins the given components together according to @flags to create - * a complete URI string. At least @scheme must be specified, and + * an absolute URI string. At least @scheme must be specified, and * @path may not be %NULL (though it may be ""). * * See also g_uri_join_with_user(), which allows specifying the * components of the "userinfo" separately. * - * Return value: a URI string + * Return value: an absolute URI string * * Since: 2.66 */ @@ -1496,13 +1496,13 @@ g_uri_join (GUriFlags flags, * @fragment: (nullable): the fragment, or %NULL * * Joins the given components together according to @flags to create - * a complete URI string. At least @scheme must be specified, and + * an absolute URI string. At least @scheme must be specified, and * @path may not be %NULL (though it may be ""). * * In constrast to g_uri_join(), this allows specifying the components * of the "userinfo" separately. * - * Return value: a URI string + * Return value: an absolute URI string * * Since: 2.66 */ From d625a29b288e952610cf6c6f2bb73c4c49aa719d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 28 Jul 2020 14:25:22 +0400 Subject: [PATCH 03/12] uri: add a comment about temporary GUri construction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As pointed out in the discussion of: https://gitlab.gnome.org/GNOME/glib/-/issues/2169 Signed-off-by: Marc-André Lureau --- glib/guri.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/glib/guri.c b/glib/guri.c index 06f930dc2..8dfc1cf4c 100644 --- a/glib/guri.c +++ b/glib/guri.c @@ -1181,6 +1181,8 @@ g_uri_parse_relative (GUri *base_uri, g_return_val_if_fail (error == NULL || *error == NULL, NULL); g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL); + /* Use GUri struct to construct the return value: there is no guarantee it is + * actually correct within the function body. */ uri = g_atomic_rc_box_new0 (GUri); uri->flags = flags; From 44d4640c47ee04a0dc333cef48a2781f1739aee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 28 Jul 2020 15:07:03 +0400 Subject: [PATCH 04/12] uri: rename absolute & relative uri_string to uri_ref MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's reserve the term URI for absolute URIs, following rfc3986 terminology. See: https://gitlab.gnome.org/GNOME/glib/-/issues/2169 Signed-off-by: Marc-André Lureau --- glib/guri.c | 76 ++++++++++++++++++++++++++--------------------------- glib/guri.h | 10 +++---- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/glib/guri.c b/glib/guri.c index 8dfc1cf4c..a4481204e 100644 --- a/glib/guri.c +++ b/glib/guri.c @@ -837,8 +837,8 @@ g_uri_split_internal (const gchar *uri_string, /** * g_uri_split: - * @uri_string: a string containing a relative or absolute URI - * @flags: flags for parsing @uri_string + * @uri_ref: a string containing a relative or absolute URI + * @flags: flags for parsing @uri_ref * @scheme: (out) (nullable) (optional) (transfer full): on return, contains * the scheme (converted to lowercase), or %NULL * @userinfo: (out) (nullable) (optional) (transfer full): on return, contains @@ -855,14 +855,14 @@ g_uri_split_internal (const gchar *uri_string, * the fragment, or %NULL * @error: #GError for error reporting, or %NULL to ignore. * - * Parses @uri_string (which can be an absolute or relative URI) + * Parses @uri_ref (which can be an absolute or relative URI) * according to @flags, and returns the pieces. Any component that - * doesn't appear in @uri_string will be returned as %NULL (but note + * doesn't appear in @uri_ref will be returned as %NULL (but note * that all URIs always have a path component, though it may be the * empty string). * * If @flags contains %G_URI_FLAGS_ENCODED, then `%`-encoded characters in - * @uri_string will remain encoded in the output strings. (If not, + * @uri_ref will remain encoded in the output strings. (If not, * then all such characters will be decoded.) Note that decoding will * only work if the URI components are ASCII or UTF-8, so you will * need to use %G_URI_FLAGS_ENCODED if they are not. @@ -872,13 +872,13 @@ g_uri_split_internal (const gchar *uri_string, * since it always returns only the full userinfo; use * g_uri_split_with_user() if you want it split up. * - * Returns: (skip): %TRUE if @uri_string parsed successfully, %FALSE + * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE * on error. * * Since: 2.66 */ gboolean -g_uri_split (const gchar *uri_string, +g_uri_split (const gchar *uri_ref, GUriFlags flags, gchar **scheme, gchar **userinfo, @@ -889,10 +889,10 @@ g_uri_split (const gchar *uri_string, gchar **fragment, GError **error) { - g_return_val_if_fail (uri_string != NULL, FALSE); + g_return_val_if_fail (uri_ref != NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE); - return g_uri_split_internal (uri_string, flags, + return g_uri_split_internal (uri_ref, flags, scheme, userinfo, NULL, NULL, NULL, host, port, path, query, fragment, error); @@ -900,8 +900,8 @@ g_uri_split (const gchar *uri_string, /** * g_uri_split_with_user: - * @uri_string: a string containing a relative or absolute URI - * @flags: flags for parsing @uri_string + * @uri_ref: a string containing a relative or absolute URI + * @flags: flags for parsing @uri_ref * @scheme: (out) (nullable) (optional) (transfer full): on return, contains * the scheme (converted to lowercase), or %NULL * @user: (out) (nullable) (optional) (transfer full): on return, contains @@ -922,9 +922,9 @@ g_uri_split (const gchar *uri_string, * the fragment, or %NULL * @error: #GError for error reporting, or %NULL to ignore. * - * Parses @uri_string (which can be an absolute or relative URI) + * Parses @uri_ref (which can be an absolute or relative URI) * according to @flags, and returns the pieces. Any component that - * doesn't appear in @uri_string will be returned as %NULL (but note + * doesn't appear in @uri_ref will be returned as %NULL (but note * that all URIs always have a path component, though it may be the * empty string). * @@ -934,13 +934,13 @@ g_uri_split (const gchar *uri_string, * @auth_params will only be parsed out if @flags contains * %G_URI_FLAGS_HAS_AUTH_PARAMS. * - * Returns: (skip): %TRUE if @uri_string parsed successfully, %FALSE + * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE * on error. * * Since: 2.66 */ gboolean -g_uri_split_with_user (const gchar *uri_string, +g_uri_split_with_user (const gchar *uri_ref, GUriFlags flags, gchar **scheme, gchar **user, @@ -953,10 +953,10 @@ g_uri_split_with_user (const gchar *uri_string, gchar **fragment, GError **error) { - g_return_val_if_fail (uri_string != NULL, FALSE); + g_return_val_if_fail (uri_ref != NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE); - return g_uri_split_internal (uri_string, flags, + return g_uri_split_internal (uri_ref, flags, scheme, NULL, user, password, auth_params, host, port, path, query, fragment, error); @@ -1039,29 +1039,29 @@ g_uri_split_network (const gchar *uri_string, /** * g_uri_is_valid: - * @uri_string: a string containing a relative or absolute URI - * @flags: flags for parsing @uri_string + * @uri_ref: a string containing a relative or absolute URI + * @flags: flags for parsing @uri_ref * @error: #GError for error reporting, or %NULL to ignore. * - * Parses @uri_string (which can be an absolute or relative URI) + * Parses @uri_ref (which can be an absolute or relative URI) * according to @flags, to determine whether it is valid. * * See g_uri_split(), and the definition of #GUriFlags, for more * information on the effect of @flags. * - * Returns: %TRUE if @uri_string parsed successfully, %FALSE on error. + * Returns: %TRUE if @uri_ref parsed successfully, %FALSE on error. * * Since: 2.66 */ gboolean -g_uri_is_valid (const gchar *uri_string, +g_uri_is_valid (const gchar *uri_ref, GUriFlags flags, GError **error) { - g_return_val_if_fail (uri_string != NULL, FALSE); + g_return_val_if_fail (uri_ref != NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE); - return g_uri_split_internal (uri_string, flags, + return g_uri_split_internal (uri_ref, flags, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, error); @@ -1157,11 +1157,11 @@ g_uri_parse (const gchar *uri_string, /** * g_uri_parse_relative: * @base_uri: (nullable): a base absolute URI - * @uri_string: a string representing a relative or absolute URI - * @flags: flags describing how to parse @uri_string + * @uri_ref: a string representing a relative or absolute URI + * @flags: flags describing how to parse @uri_ref * @error: #GError for error reporting, or %NULL to ignore. * - * Parses @uri_string according to @flags and, if it is a relative + * Parses @uri_ref according to @flags and, if it is a relative * URI, resolves it relative to @base_uri. If the result is not a * valid absolute URI, it will be discarded, and an error returned. * @@ -1171,13 +1171,13 @@ g_uri_parse (const gchar *uri_string, */ GUri * g_uri_parse_relative (GUri *base_uri, - const gchar *uri_string, + const gchar *uri_ref, GUriFlags flags, GError **error) { GUri *uri = NULL; - g_return_val_if_fail (uri_string != NULL, NULL); + g_return_val_if_fail (uri_ref != NULL, NULL); g_return_val_if_fail (error == NULL || *error == NULL, NULL); g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL); @@ -1186,7 +1186,7 @@ g_uri_parse_relative (GUri *base_uri, uri = g_atomic_rc_box_new0 (GUri); uri->flags = flags; - if (!g_uri_split_internal (uri_string, flags, + if (!g_uri_split_internal (uri_ref, flags, &uri->scheme, &uri->userinfo, &uri->user, &uri->password, &uri->auth_params, &uri->host, &uri->port, @@ -1269,16 +1269,16 @@ g_uri_parse_relative (GUri *base_uri, /** * g_uri_resolve_relative: * @base_uri_string: (nullable): a string representing a base URI - * @uri_string: a string representing a relative or absolute URI - * @flags: flags describing how to parse @uri_string + * @uri_ref: a string representing a relative or absolute URI + * @flags: flags describing how to parse @uri_ref * @error: #GError for error reporting, or %NULL to ignore. * - * Parses @uri_string according to @flags and, if it is a relative + * Parses @uri_ref according to @flags and, if it is a relative * URI, resolves it relative to @base_uri_string. If the result is not * a valid absolute URI, it will be discarded, and an error returned. * - * (If @base_uri_string is %NULL, this just returns @uri_string, or - * %NULL if @uri_string is invalid or not absolute.) + * (If @base_uri_string is %NULL, this just returns @uri_ref, or + * %NULL if @uri_ref is invalid or not absolute.) * * Return value: the resolved URI string. * @@ -1286,14 +1286,14 @@ g_uri_parse_relative (GUri *base_uri, */ gchar * g_uri_resolve_relative (const gchar *base_uri_string, - const gchar *uri_string, + const gchar *uri_ref, GUriFlags flags, GError **error) { GUri *base_uri, *resolved_uri; gchar *resolved_uri_string; - g_return_val_if_fail (uri_string != NULL, NULL); + g_return_val_if_fail (uri_ref != NULL, NULL); g_return_val_if_fail (error == NULL || *error == NULL, NULL); flags |= G_URI_FLAGS_ENCODED; @@ -1307,7 +1307,7 @@ g_uri_resolve_relative (const gchar *base_uri_string, else base_uri = NULL; - resolved_uri = g_uri_parse_relative (base_uri, uri_string, flags, error); + resolved_uri = g_uri_parse_relative (base_uri, uri_ref, flags, error); if (base_uri) g_uri_unref (base_uri); if (!resolved_uri) diff --git a/glib/guri.h b/glib/guri.h index a9a4cd57e..a8396f4e6 100644 --- a/glib/guri.h +++ b/glib/guri.h @@ -78,7 +78,7 @@ typedef enum { } GUriFlags; GLIB_AVAILABLE_IN_2_66 -gboolean g_uri_split (const gchar *uri_string, +gboolean g_uri_split (const gchar *uri_ref, GUriFlags flags, gchar **scheme, gchar **userinfo, @@ -89,7 +89,7 @@ gboolean g_uri_split (const gchar *uri_string, gchar **fragment, GError **error); GLIB_AVAILABLE_IN_2_66 -gboolean g_uri_split_with_user (const gchar *uri_string, +gboolean g_uri_split_with_user (const gchar *uri_ref, GUriFlags flags, gchar **scheme, gchar **user, @@ -110,7 +110,7 @@ gboolean g_uri_split_network (const gchar *uri_string, GError **error); GLIB_AVAILABLE_IN_2_66 -gboolean g_uri_is_valid (const gchar *uri_string, +gboolean g_uri_is_valid (const gchar *uri_ref, GUriFlags flags, GError **error); @@ -141,13 +141,13 @@ GUri * g_uri_parse (const gchar *uri_string, GError **error); GLIB_AVAILABLE_IN_2_66 GUri * g_uri_parse_relative (GUri *base_uri, - const gchar *uri_string, + const gchar *uri_ref, GUriFlags flags, GError **error); GLIB_AVAILABLE_IN_2_66 gchar * g_uri_resolve_relative (const gchar *base_uri_string, - const gchar *uri_string, + const gchar *uri_ref, GUriFlags flags, GError **error); From 82ad7853baacdc10f57279429cecedc35f862a41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 28 Jul 2020 15:42:33 +0400 Subject: [PATCH 05/12] uri: change g_uri_is_valid() to check absolute URI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit g_uri_is_valid() should check the given URI is valid following RFC-3986, and reject relative references. Fixes: https://gitlab.gnome.org/GNOME/glib/-/issues/2169 Signed-off-by: Marc-André Lureau --- glib/guri.c | 19 ++++++++----------- glib/guri.h | 2 +- glib/tests/uri.c | 4 +++- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/glib/guri.c b/glib/guri.c index a4481204e..d20b2eacc 100644 --- a/glib/guri.c +++ b/glib/guri.c @@ -1039,32 +1039,29 @@ g_uri_split_network (const gchar *uri_string, /** * g_uri_is_valid: - * @uri_ref: a string containing a relative or absolute URI - * @flags: flags for parsing @uri_ref + * @uri_string: a string containing an absolute URI + * @flags: flags for parsing @uri_string * @error: #GError for error reporting, or %NULL to ignore. * - * Parses @uri_ref (which can be an absolute or relative URI) - * according to @flags, to determine whether it is valid. + * Parses @uri_string according to @flags, to determine whether it is valid + * absolute URI. * * See g_uri_split(), and the definition of #GUriFlags, for more * information on the effect of @flags. * - * Returns: %TRUE if @uri_ref parsed successfully, %FALSE on error. + * Returns: %TRUE if @uri_string parsed successfully, %FALSE on error. * * Since: 2.66 */ gboolean -g_uri_is_valid (const gchar *uri_ref, +g_uri_is_valid (const gchar *uri_string, GUriFlags flags, GError **error) { - g_return_val_if_fail (uri_ref != NULL, FALSE); + g_return_val_if_fail (uri_string != NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE); - return g_uri_split_internal (uri_ref, flags, - NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, - error); + return g_uri_split_network (uri_string, flags, NULL, NULL, NULL, error); } diff --git a/glib/guri.h b/glib/guri.h index a8396f4e6..93d2ec59b 100644 --- a/glib/guri.h +++ b/glib/guri.h @@ -110,7 +110,7 @@ gboolean g_uri_split_network (const gchar *uri_string, GError **error); GLIB_AVAILABLE_IN_2_66 -gboolean g_uri_is_valid (const gchar *uri_ref, +gboolean g_uri_is_valid (const gchar *uri_string, GUriFlags flags, GError **error); diff --git a/glib/tests/uri.c b/glib/tests/uri.c index 70cee76e9..be70b5d6a 100644 --- a/glib/tests/uri.c +++ b/glib/tests/uri.c @@ -1267,7 +1267,9 @@ test_uri_is_valid (void) g_assert_true (g_uri_is_valid ("http://\xc3\x89XAMPLE.COM/", G_URI_FLAGS_NONE, NULL)); g_assert_true (g_uri_is_valid (" \r http\t://f oo \t\n ", G_URI_FLAGS_NONE, NULL)); - g_assert_true (g_uri_is_valid (" \r http\t://f oo \t\n ", G_URI_FLAGS_PARSE_STRICT, NULL)); + g_assert_false (g_uri_is_valid (" \r http\t://f oo \t\n ", G_URI_FLAGS_PARSE_STRICT, &error)); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME); + g_clear_error (&error); g_assert_false (g_uri_is_valid ("http://[::192.9.5.5/ipng", G_URI_FLAGS_NONE, &error)); g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST); From 034a4dcdc06ec0780be8c5be38f3955e1396e542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 6 Jul 2020 23:34:27 +0400 Subject: [PATCH 06/12] gio: replace _g_uri_from_authority() with g_uri_join() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _g_uri_from_authority() is doing the same work as g_uri_join(): taking URI components and merging them in a legit URI string, with encoding. It turns out g_uri_from_authority was unnecessarily complex, since no caller used the userinfo field. Signed-off-by: Marc-André Lureau --- gio/gnetworkaddress.c | 52 +++++++--------------------------------- gio/gnetworkingprivate.h | 4 ---- gio/gnetworkservice.c | 12 ++++++---- gio/gsocketaddress.c | 2 +- 4 files changed, 17 insertions(+), 53 deletions(-) diff --git a/gio/gnetworkaddress.c b/gio/gnetworkaddress.c index f12f93585..9693fc125 100644 --- a/gio/gnetworkaddress.c +++ b/gio/gnetworkaddress.c @@ -764,46 +764,6 @@ error: return FALSE; } -gchar * -_g_uri_from_authority (const gchar *protocol, - const gchar *host, - guint port, - const gchar *userinfo) -{ - GString *uri; - - uri = g_string_new (protocol); - g_string_append (uri, "://"); - - if (userinfo) - { - g_string_append_uri_escaped (uri, userinfo, G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO, FALSE); - g_string_append_c (uri, '@'); - } - - if (g_hostname_is_non_ascii (host)) - { - gchar *ace_encoded = g_hostname_to_ascii (host); - - if (!ace_encoded) - { - g_string_free (uri, TRUE); - return NULL; - } - g_string_append (uri, ace_encoded); - g_free (ace_encoded); - } - else if (strchr (host, ':')) - g_string_append_printf (uri, "[%s]", host); - else - g_string_append (uri, host); - - if (port != 0) - g_string_append_printf (uri, ":%u", port); - - return g_string_free (uri, FALSE); -} - /** * g_network_address_parse_uri: * @uri: the hostname and optionally a port @@ -1459,10 +1419,14 @@ g_network_address_connectable_proxy_enumerate (GSocketConnectable *connectable) GSocketAddressEnumerator *proxy_enum; gchar *uri; - uri = _g_uri_from_authority (self->priv->scheme ? self->priv->scheme : "none", - self->priv->hostname, - self->priv->port, - NULL); + uri = g_uri_join (G_URI_FLAGS_NONE, + self->priv->scheme ? self->priv->scheme : "none", + NULL, + self->priv->hostname, + self->priv->port, + "", + NULL, + NULL); proxy_enum = g_object_new (G_TYPE_PROXY_ADDRESS_ENUMERATOR, "connectable", connectable, diff --git a/gio/gnetworkingprivate.h b/gio/gnetworkingprivate.h index ed0feb823..656379db3 100644 --- a/gio/gnetworkingprivate.h +++ b/gio/gnetworkingprivate.h @@ -28,10 +28,6 @@ gboolean _g_uri_parse_authority (const char *uri, guint16 *port, char **userinfo, GError **error); -gchar * _g_uri_from_authority (const gchar *protocol, - const gchar *host, - guint port, - const gchar *userinfo); guint64 g_resolver_get_serial (GResolver *resolver); diff --git a/gio/gnetworkservice.c b/gio/gnetworkservice.c index 92225f137..2b8571e9b 100644 --- a/gio/gnetworkservice.c +++ b/gio/gnetworkservice.c @@ -465,10 +465,14 @@ g_network_service_address_enumerator_next (GSocketAddressEnumerator *enumerator continue; } - uri = _g_uri_from_authority (g_network_service_get_scheme (srv_enum->srv), - hostname, - g_srv_target_get_port (target), - NULL); + uri = g_uri_join (G_URI_FLAGS_NONE, + g_network_service_get_scheme (srv_enum->srv), + NULL, + hostname, + g_srv_target_get_port (target), + "", + NULL, + NULL); g_free (hostname); addr = g_network_address_parse_uri (uri, diff --git a/gio/gsocketaddress.c b/gio/gsocketaddress.c index 848e37b0b..2b7e83ccf 100644 --- a/gio/gsocketaddress.c +++ b/gio/gsocketaddress.c @@ -398,7 +398,7 @@ g_socket_address_connectable_proxy_enumerate (GSocketConnectable *connectable) g_object_get (connectable, "address", &addr, "port", &port, NULL); ip = g_inet_address_to_string (addr); - uri = _g_uri_from_authority ("none", ip, port, NULL); + uri = g_uri_join (G_URI_FLAGS_NONE, "none", NULL, ip, port, "", NULL, NULL); addr_enum = g_object_new (G_TYPE_PROXY_ADDRESS_ENUMERATOR, "connectable", connectable, From a173a6f617d8bc058fb60f13eff08ed68c663926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 6 Jul 2020 23:45:01 +0400 Subject: [PATCH 07/12] gio: use g_uri_is_valid() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _g_uri_parse_authority() without argument is actually checking that the URI is valid, by checking it parses successfully We keep the existing error domain / code for compatibility reasons, instead of raising the underlying G_URI_ERROR. Signed-off-by: Marc-André Lureau --- gio/gproxyresolver.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/gio/gproxyresolver.c b/gio/gproxyresolver.c index bd9528dd0..f72ad2a72 100644 --- a/gio/gproxyresolver.c +++ b/gio/gproxyresolver.c @@ -29,6 +29,7 @@ #include "gcancellable.h" #include "gtask.h" #include "giomodule.h" +#include "gioerror.h" #include "giomodule-priv.h" #include "gnetworkingprivate.h" @@ -147,8 +148,12 @@ g_proxy_resolver_lookup (GProxyResolver *resolver, g_return_val_if_fail (G_IS_PROXY_RESOLVER (resolver), NULL); g_return_val_if_fail (uri != NULL, NULL); - if (!_g_uri_parse_authority (uri, NULL, NULL, NULL, error)) - return NULL; + if (!g_uri_is_valid (uri, G_URI_FLAGS_PARSE_STRICT, NULL)) + { + g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT, + "Invalid URI ‘%s’", uri); + return NULL; + } iface = G_PROXY_RESOLVER_GET_IFACE (resolver); @@ -181,8 +186,10 @@ g_proxy_resolver_lookup_async (GProxyResolver *resolver, g_return_if_fail (G_IS_PROXY_RESOLVER (resolver)); g_return_if_fail (uri != NULL); - if (!_g_uri_parse_authority (uri, NULL, NULL, NULL, &error)) + if (!g_uri_is_valid (uri, G_URI_FLAGS_PARSE_STRICT, NULL)) { + g_set_error (&error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT, + "Invalid URI ‘%s’", uri); g_task_report_error (resolver, callback, user_data, g_proxy_resolver_lookup_async, g_steal_pointer (&error)); From 6f419e7e2f1c00e297384688569e47ff09dc7de5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 7 Jul 2020 12:58:33 +0400 Subject: [PATCH 08/12] gio: use g_uri_split_network() in g_network_address_parse_uri() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _g_uri_parse_authority() can be replaced with g_uri_split_network() & PARSE_STRICT. Keep the original error code, for compatibility reasons. Notice that GUri uses gint for the port, and value -1 if the port value is missing. However, GNetworkAddress::port is a guint. Signed-off-by: Marc-André Lureau --- gio/gnetworkaddress.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/gio/gnetworkaddress.c b/gio/gnetworkaddress.c index 9693fc125..2fa10733e 100644 --- a/gio/gnetworkaddress.c +++ b/gio/gnetworkaddress.c @@ -787,25 +787,27 @@ g_network_address_parse_uri (const gchar *uri, guint16 default_port, GError **error) { - GSocketConnectable *conn; - gchar *scheme; - gchar *hostname; - guint16 port; + GSocketConnectable *conn = NULL; + gchar *scheme = NULL; + gchar *hostname = NULL; + gint port; - if (!_g_uri_parse_authority (uri, &hostname, &port, NULL, error)) - return NULL; + if (!g_uri_split_network (uri, G_URI_FLAGS_PARSE_STRICT, + &scheme, &hostname, &port, NULL)) + { + g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT, + "Invalid URI ‘%s’", uri); + return NULL; + } - if (port == 0) + if (port <= 0) port = default_port; - scheme = g_uri_parse_scheme (uri); - conn = g_object_new (G_TYPE_NETWORK_ADDRESS, "hostname", hostname, - "port", port, + "port", (guint) port, "scheme", scheme, NULL); - g_free (scheme); g_free (hostname); From 1b6a0535e8e3aad12febaa686dd53b867d86494e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 7 Jul 2020 13:02:50 +0400 Subject: [PATCH 09/12] gio: use g_uri_split_with_user() in save_userinfo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau --- gio/gproxyaddressenumerator.c | 37 ++++++----------------------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/gio/gproxyaddressenumerator.c b/gio/gproxyaddressenumerator.c index ecf04d110..d3de4940c 100644 --- a/gio/gproxyaddressenumerator.c +++ b/gio/gproxyaddressenumerator.c @@ -93,39 +93,14 @@ G_DEFINE_TYPE_WITH_PRIVATE (GProxyAddressEnumerator, g_proxy_address_enumerator, static void save_userinfo (GProxyAddressEnumeratorPrivate *priv, - const gchar *proxy) + const gchar *proxy) { - gchar *userinfo; + g_clear_pointer (&priv->proxy_username, g_free); + g_clear_pointer (&priv->proxy_password, g_free); - if (priv->proxy_username) - { - g_free (priv->proxy_username); - priv->proxy_username = NULL; - } - - if (priv->proxy_password) - { - g_free (priv->proxy_password); - priv->proxy_password = NULL; - } - - if (_g_uri_parse_authority (proxy, NULL, NULL, &userinfo, NULL)) - { - if (userinfo) - { - gchar **split = g_strsplit (userinfo, ":", 2); - - if (split[0] != NULL) - { - priv->proxy_username = g_uri_unescape_string (split[0], NULL); - if (split[1] != NULL) - priv->proxy_password = g_uri_unescape_string (split[1], NULL); - } - - g_strfreev (split); - g_free (userinfo); - } - } + g_uri_split_with_user (proxy, G_URI_FLAGS_HAS_PASSWORD, NULL, + &priv->proxy_username, &priv->proxy_password, + NULL, NULL, NULL, NULL, NULL, NULL, NULL); } static void From aa0b09ebeff911c9c9d2ca6c3efd06ff45a339f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 7 Jul 2020 13:03:44 +0400 Subject: [PATCH 10/12] gio: use g_uri_split_network() in g_simple_proxy_resolver_lookup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau --- gio/gsimpleproxyresolver.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gio/gsimpleproxyresolver.c b/gio/gsimpleproxyresolver.c index f33d49f8c..58368ea4e 100644 --- a/gio/gsimpleproxyresolver.c +++ b/gio/gsimpleproxyresolver.c @@ -327,10 +327,11 @@ g_simple_proxy_resolver_lookup (GProxyResolver *proxy_resolver, if (priv->ignore_ips || priv->ignore_domains) { gchar *host = NULL; - gushort port; + gint port; - if (_g_uri_parse_authority (uri, &host, &port, NULL, NULL) && - ignore_host (resolver, host, port)) + if (g_uri_split_network (uri, G_URI_FLAGS_PARSE_STRICT, NULL, + &host, &port, NULL) && + ignore_host (resolver, host, port > 0 ? port : 0)) proxy = "direct://"; g_free (host); From dd0fae13031cd61a866934fbc68e715ada3cf4ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 7 Jul 2020 13:04:02 +0400 Subject: [PATCH 11/12] gio: remove _g_uri_parse_authority() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is now unused and redundant with GUri. Fixes: #2156 Signed-off-by: Marc-André Lureau --- gio/gnetworkaddress.c | 246 --------------------------------------- gio/gnetworkingprivate.h | 6 - 2 files changed, 252 deletions(-) diff --git a/gio/gnetworkaddress.c b/gio/gnetworkaddress.c index 2fa10733e..a93bafa36 100644 --- a/gio/gnetworkaddress.c +++ b/gio/gnetworkaddress.c @@ -518,252 +518,6 @@ g_network_address_parse (const gchar *host_and_port, return connectable; } -/* Allowed characters outside alphanumeric for unreserved. */ -#define G_URI_OTHER_UNRESERVED "-._~" - -/* This or something equivalent will eventually go into glib/guri.h */ -gboolean -_g_uri_parse_authority (const char *uri, - char **host, - guint16 *port, - char **userinfo, - GError **error) -{ - char *ascii_uri, *tmp_str; - const char *start, *p, *at, *delim; - char c; - - g_return_val_if_fail (uri != NULL, FALSE); - - if (host) - *host = NULL; - - if (port) - *port = 0; - - if (userinfo) - *userinfo = NULL; - - /* Catch broken URIs early by trying to convert to ASCII. */ - ascii_uri = g_hostname_to_ascii (uri); - if (!ascii_uri) - goto error; - - /* From RFC 3986 Decodes: - * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] - * hier-part = "//" authority path-abempty - * path-abempty = *( "/" segment ) - * authority = [ userinfo "@" ] host [ ":" port ] - */ - - /* Check we have a valid scheme */ - tmp_str = g_uri_parse_scheme (ascii_uri); - - if (tmp_str == NULL) - goto error; - - g_free (tmp_str); - - /* Decode hier-part: - * hier-part = "//" authority path-abempty - */ - p = ascii_uri; - start = strstr (p, "//"); - - if (start == NULL) - goto error; - - start += 2; - - /* check if the @ sign is part of the authority before attempting to - * decode the userinfo */ - delim = strpbrk (start, "/?#[]"); - at = strchr (start, '@'); - if (at && delim && at > delim) - at = NULL; - - if (at != NULL) - { - /* Decode userinfo: - * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) - * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - * pct-encoded = "%" HEXDIG HEXDIG - */ - p = start; - while (1) - { - c = *p++; - - if (c == '@') - break; - - /* pct-encoded */ - if (c == '%') - { - if (!(g_ascii_isxdigit (p[0]) || - g_ascii_isxdigit (p[1]))) - goto error; - - p++; - - continue; - } - - /* unreserved / sub-delims / : */ - if (!(g_ascii_isalnum (c) || - strchr (G_URI_OTHER_UNRESERVED, c) || - strchr (G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS, c) || - c == ':')) - goto error; - } - - if (userinfo) - *userinfo = g_strndup (start, p - start - 1); - - start = p; - } - else - { - p = start; - } - - - /* decode host: - * host = IP-literal / IPv4address / reg-name - * reg-name = *( unreserved / pct-encoded / sub-delims ) - */ - - /* If IPv6 or IPvFuture */ - if (*p == '[') - { - gboolean has_scope_id = FALSE, has_bad_scope_id = FALSE; - - start++; - p++; - while (1) - { - c = *p++; - - if (c == ']') - break; - - if (c == '%' && !has_scope_id) - { - has_scope_id = TRUE; - if (p[0] != '2' || p[1] != '5') - has_bad_scope_id = TRUE; - continue; - } - - /* unreserved / sub-delims */ - if (!(g_ascii_isalnum (c) || - strchr (G_URI_OTHER_UNRESERVED, c) || - strchr (G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS, c) || - c == ':' || - c == '.')) - goto error; - } - - if (host) - { - if (has_bad_scope_id) - *host = g_strndup (start, p - start - 1); - else - *host = g_uri_unescape_segment (start, p - 1, NULL); - } - - c = *p++; - } - else - { - while (1) - { - c = *p++; - - if (c == ':' || - c == '/' || - c == '?' || - c == '#' || - c == '\0') - break; - - /* pct-encoded */ - if (c == '%') - { - if (!(g_ascii_isxdigit (p[0]) || - g_ascii_isxdigit (p[1]))) - goto error; - - p++; - - continue; - } - - /* unreserved / sub-delims */ - if (!(g_ascii_isalnum (c) || - strchr (G_URI_OTHER_UNRESERVED, c) || - strchr (G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS, c))) - goto error; - } - - if (host) - *host = g_uri_unescape_segment (start, p - 1, NULL); - } - - if (c == ':') - { - /* Decode port: - * port = *DIGIT - */ - guint tmp = 0; - - while (1) - { - c = *p++; - - if (c == '/' || - c == '?' || - c == '#' || - c == '\0') - break; - - if (!g_ascii_isdigit (c)) - goto error; - - tmp = (tmp * 10) + (c - '0'); - - if (tmp > 65535) - goto error; - } - if (port) - *port = (guint16) tmp; - } - - g_free (ascii_uri); - - return TRUE; - -error: - g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT, - "Invalid URI ‘%s’", uri); - - if (host && *host) - { - g_free (*host); - *host = NULL; - } - - if (userinfo && *userinfo) - { - g_free (*userinfo); - *userinfo = NULL; - } - - g_free (ascii_uri); - - return FALSE; -} - /** * g_network_address_parse_uri: * @uri: the hostname and optionally a port diff --git a/gio/gnetworkingprivate.h b/gio/gnetworkingprivate.h index 656379db3..dd8a277a5 100644 --- a/gio/gnetworkingprivate.h +++ b/gio/gnetworkingprivate.h @@ -23,12 +23,6 @@ G_BEGIN_DECLS -gboolean _g_uri_parse_authority (const char *uri, - char **host, - guint16 *port, - char **userinfo, - GError **error); - guint64 g_resolver_get_serial (GResolver *resolver); gint g_socket (gint domain, From 0fea3d71e43ea624b7b44dc02c10d165358b3c68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 7 Jul 2020 20:23:36 +0400 Subject: [PATCH 12/12] guri: add some IPv6 scope-id tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a few ipv6 scope parsing corner test cases. - checking incorrect scoped IPv6 ending with only %25 isn't decoded. - checking valid scoped IPv6 is passing g_uri_is_valid() As discussed in https://gitlab.gnome.org/GNOME/glib/-/merge_requests/1567#note_860499, for historical reasons, GUri accepts the % preceding the zone-id in the unescaped form as well. Signed-off-by: Marc-André Lureau --- glib/tests/uri.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/glib/tests/uri.c b/glib/tests/uri.c index be70b5d6a..dd6933603 100644 --- a/glib/tests/uri.c +++ b/glib/tests/uri.c @@ -703,6 +703,8 @@ static const UriAbsoluteTest absolute_tests[] = { { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } }, { "http://[fe80::dead:beef%10]/", { "http", NULL, "fe80::dead:beef%10", -1, "/", NULL, NULL } }, + { "http://[fe80::dead:beef%25]/", + { "http", NULL, "fe80::dead:beef%25", -1, "/", NULL, NULL } }, }; static int num_absolute_tests = G_N_ELEMENTS (absolute_tests); @@ -1275,6 +1277,7 @@ test_uri_is_valid (void) g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST); g_clear_error (&error); + g_assert_true (g_uri_is_valid ("http://[fe80::dead:beef%25wef]/", G_URI_FLAGS_NONE, NULL)); g_assert_false (g_uri_is_valid ("http://[fe80::dead:beef%wef%]/", G_URI_FLAGS_NONE, &error)); g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST); g_clear_error (&error);