From d83d68d64c40021be432416f9912ff9e59a337ce Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Thu, 17 Jan 2019 10:38:20 +0400 Subject: [PATCH] guri: new URI parsing and generating functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a set of new URI parsing and generating functions, including a new parsed-URI type GUri. Move all the code from gurifuncs.c into guri.c, reimplementing some of those functions (and g_string_append_uri_encoded()) in terms of the new code. Fixes: https://gitlab.gnome.org/GNOME/glib/issues/110 Signed-off-by: Marc-André Lureau --- .gitlab-ci.yml | 2 + docs/reference/glib/glib-docs.xml | 2 +- docs/reference/glib/glib-sections.txt | 48 +- docs/reference/gobject/gobject-sections.txt | 2 + glib/glib-autocleanups.h | 1 + glib/glib.h | 2 +- glib/gstring.c | 66 +- glib/guri.c | 2350 +++++++++++++++++++ glib/guri.h | 347 +++ glib/gurifuncs.c | 252 -- glib/gurifuncs.h | 83 - glib/guriprivate.h | 36 + glib/meson.build | 5 +- glib/tests/uri.c | 950 +++++++- gobject/gboxed.c | 1 + gobject/glib-types.h | 11 + 16 files changed, 3715 insertions(+), 443 deletions(-) create mode 100644 glib/guri.c create mode 100644 glib/guri.h delete mode 100644 glib/gurifuncs.c delete mode 100644 glib/gurifuncs.h create mode 100644 glib/guriprivate.h diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fb0454e57..e25d57bd9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -138,6 +138,8 @@ installed-tests: _build - ninja -C _build - sudo ninja -C _build install + # Remove old headers, possibly present in current installation + - sudo rm -f /usr/include/glib-2.0/glib/gurifuncs.h - sudo chown -R `id -un`:`id -gn` _build/ # FIXME Install newer xdg-desktop-portal with # GMemoryMonitor support, see: diff --git a/docs/reference/glib/glib-docs.xml b/docs/reference/glib/glib-docs.xml index 3ed125652..38228ca0a 100644 --- a/docs/reference/glib/glib-docs.xml +++ b/docs/reference/glib/glib-docs.xml @@ -81,7 +81,7 @@ - + diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt index eed38b759..6f2722ade 100644 --- a/docs/reference/glib/glib-sections.txt +++ b/docs/reference/glib/glib-sections.txt @@ -3335,19 +3335,63 @@ g_base64_decode_inplace
URI Functions -gurifuncs +guri +GUri +g_uri_ref +g_uri_unref + +GUriFlags +g_uri_split +g_uri_split_with_user +g_uri_split_network +g_uri_is_valid +g_uri_join +g_uri_join_with_user +g_uri_parse +g_uri_parse_relative +g_uri_resolve_relative +g_uri_build +g_uri_build_with_user +g_uri_peek_scheme +g_uri_parse_scheme + +GUriHideFlags +g_uri_to_string +g_uri_to_string_partial + +g_uri_get_scheme +g_uri_get_userinfo +g_uri_get_user +g_uri_get_password +g_uri_get_auth_params +g_uri_get_host +g_uri_get_port +g_uri_get_path +g_uri_get_query +g_uri_get_fragment +g_uri_get_flags + +g_uri_parse_params + G_URI_RESERVED_CHARS_ALLOWED_IN_PATH G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO G_URI_RESERVED_CHARS_GENERIC_DELIMITERS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS -g_uri_parse_scheme g_uri_escape_string g_uri_unescape_string +g_uri_escape_bytes +g_uri_unescape_bytes g_uri_unescape_segment + g_uri_list_extract_uris g_filename_from_uri g_filename_to_uri + +G_URI_ERROR +GUriError + +g_uri_error_quark
diff --git a/docs/reference/gobject/gobject-sections.txt b/docs/reference/gobject/gobject-sections.txt index e5ffe5b23..70f28329d 100644 --- a/docs/reference/gobject/gobject-sections.txt +++ b/docs/reference/gobject/gobject-sections.txt @@ -408,6 +408,7 @@ G_TYPE_SOURCE G_TYPE_POLLFD G_TYPE_THREAD G_TYPE_OPTION_GROUP +G_TYPE_URI G_TYPE_IS_BOXED @@ -441,6 +442,7 @@ g_mapped_file_get_type g_markup_parse_context_get_type g_thread_get_type g_option_group_get_type +g_uri_get_type
diff --git a/glib/glib-autocleanups.h b/glib/glib-autocleanups.h index 529cb5de2..2fa4b9698 100644 --- a/glib/glib-autocleanups.h +++ b/glib/glib-autocleanups.h @@ -97,5 +97,6 @@ G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(GVariantDict, g_variant_dict_clear) G_DEFINE_AUTOPTR_CLEANUP_FUNC(GVariantType, g_variant_type_free) G_DEFINE_AUTO_CLEANUP_FREE_FUNC(GStrv, g_strfreev, NULL) G_DEFINE_AUTOPTR_CLEANUP_FUNC(GRefString, g_ref_string_release) +G_DEFINE_AUTOPTR_CLEANUP_FUNC(GUri, g_uri_unref) G_GNUC_END_IGNORE_DEPRECATIONS diff --git a/glib/glib.h b/glib/glib.h index e3b30558e..fe8ce88d5 100644 --- a/glib/glib.h +++ b/glib/glib.h @@ -91,7 +91,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/glib/gstring.c b/glib/gstring.c index be456ffb4..85294258b 100644 --- a/glib/gstring.c +++ b/glib/gstring.c @@ -35,7 +35,7 @@ #include #include "gstring.h" - +#include "guriprivate.h" #include "gprintf.h" @@ -506,34 +506,6 @@ g_string_insert_len (GString *string, return string; } -#define SUB_DELIM_CHARS "!$&'()*+,;=" - -static gboolean -is_valid (char c, - const char *reserved_chars_allowed) -{ - if (g_ascii_isalnum (c) || - c == '-' || - c == '.' || - c == '_' || - c == '~') - return TRUE; - - if (reserved_chars_allowed && - strchr (reserved_chars_allowed, c) != NULL) - return TRUE; - - return FALSE; -} - -static gboolean -gunichar_ok (gunichar c) -{ - return - (c != (gunichar) -2) && - (c != (gunichar) -1); -} - /** * g_string_append_uri_escaped: * @string: a #GString @@ -542,7 +514,7 @@ gunichar_ok (gunichar c) * to be used, or %NULL * @allow_utf8: set %TRUE if the escaped string may include UTF8 characters * - * Appends @unescaped to @string, escaped any characters that + * Appends @unescaped to @string, escaping any characters that * are reserved in URIs using URI-style escape sequences. * * Returns: (transfer none): @string @@ -555,38 +527,8 @@ g_string_append_uri_escaped (GString *string, const gchar *reserved_chars_allowed, gboolean allow_utf8) { - unsigned char c; - const gchar *end; - static const gchar hex[16] = "0123456789ABCDEF"; - - g_return_val_if_fail (string != NULL, NULL); - g_return_val_if_fail (unescaped != NULL, NULL); - - end = unescaped + strlen (unescaped); - - while ((c = *unescaped) != 0) - { - if (c >= 0x80 && allow_utf8 && - gunichar_ok (g_utf8_get_char_validated (unescaped, end - unescaped))) - { - int len = g_utf8_skip [c]; - g_string_append_len (string, unescaped, len); - unescaped += len; - } - else if (is_valid (c, reserved_chars_allowed)) - { - g_string_append_c (string, c); - unescaped++; - } - else - { - g_string_append_c (string, '%'); - g_string_append_c (string, hex[((guchar)c) >> 4]); - g_string_append_c (string, hex[((guchar)c) & 0xf]); - unescaped++; - } - } - + _uri_encoder (string, (const guchar *) unescaped, strlen (unescaped), + reserved_chars_allowed, allow_utf8); return string; } diff --git a/glib/guri.c b/glib/guri.c new file mode 100644 index 000000000..5b3b721a6 --- /dev/null +++ b/glib/guri.c @@ -0,0 +1,2350 @@ +/* GLIB - Library of useful routines for C programming + * Copyright © 2020 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General + * Public License along with this library; if not, see + * . + */ + +#include "config.h" + +#include +#include + +#include "glib.h" +#include "glibintl.h" +#include "guriprivate.h" + +/** + * SECTION:guri + * @short_description: URI-handling utilities + * @include: glib.h + * + * The #GUri type and related functions can be used to parse URIs into + * their components, and build valid URIs from individual components. + * + * ## Parsing URIs + * + * The most minimalist APIs for parsing URIs are g_uri_split() and + * g_uri_split_with_user(). These split a URI into its component + * parts, and return the parts; the difference between the two is that + * g_uri_split() treats the "userinfo" component of the URI as a + * single element, while g_uri_split_with_user() can (depending on the + * #GUriFlags you pass) treat it as containing a username, password, + * and authentication parameters. Alternatively, g_uri_split_network() + * can be used when you are only interested in the components that are + * needed to initiate a network connection to the service (scheme, + * host, and port). + * + * g_uri_parse() is similar to g_uri_split(), but instead of returning + * individual strings, it returns a #GUri structure (and it requires + * that the URI be an absolute URI). + * + * g_uri_resolve_relative() and g_uri_parse_relative() allow you to + * resolve a relative URI relative to a base URI. + * g_uri_resolve_relative() takes two strings and returns a string, + * and g_uri_parse_relative() takes a #GUri and a string and returns a + * #GUri. + * + * All of the parsing functions take a #GUriFlags argument describing + * exactly how to parse the URI; see the documentation for that type + * for more details on the specific flags that you can pass. If you + * need to choose different flags based on the type of URI, you can + * use g_uri_peek_scheme() on the URI string to check the scheme + * first, and use that to decide what flags to parse it with. + * + * ## Building URIs + * + * g_uri_join() and g_uri_join_with_user() can be used to construct + * valid URI strings from a set of component strings; they are the + * inverse of g_uri_split() and g_uri_split_with_user(). + * + * Similarly, g_uri_build() and g_uri_build_with_user() can be used to + * construct a #GUri from a set of component strings. + * + * As with the parsing functions, the building functions take a + * #GUriFlags argument; in particular, it is important to keep in mind + * whether the URI components you are using have `%`-encoded + * characters in them or not, and pass the appropriate flags + * accordingly. + * + * ## `file://` URIs + * + * Note that Windows and Unix both define special rules for parsing + * `file://` URIs (involving non-UTF-8 character sets on Unix, and the + * interpretation of path separators on Windows). #GUri does not + * implement these rules. Use g_filename_from_uri() and + * g_filename_to_uri() if you want to properly convert between + * `file://` URIs and local filenames. + * + * ## URI Equality + * + * Note that there is no `g_uri_equal ()` function, because comparing + * URIs usefully requires scheme-specific knowledge that #GUri does + * not have. For example, "`http://example.com/`" and + * "`http://EXAMPLE.COM:80`" have exactly the same meaning according + * to the HTTP specification, and "`data:,foo`" and + * "`data:;base64,Zm9v`" resolve to the same thing according to the + * `data:` URI specification. + * + * Since: 2.66 + */ + +/** + * GUri: + * + * A parsed absolute URI. + * + * Since #GUri only represents absolute URIs, all #GUris will have a + * URI scheme, so g_uri_get_scheme() will always return a non-%NULL + * answer. Likewise, by definition, all URIs have a path component, so + * g_uri_get_path() will always return non-%NULL (though it may return + * the empty string). + * + * If the URI string has an "authority" component (that is, if the + * scheme is followed by "`://`" rather than just "`:`"), then the + * #GUri will contain a hostname, and possibly a port and "userinfo". + * Additionally, depending on how the #GUri was constructed/parsed, + * the userinfo may be split out into a username, password, and + * additional authorization-related parameters. + * + * Normally, the components of a #GUri will have all `%`-encoded + * characters decoded. However, if you construct/parse a #GUri with + * %G_URI_FLAGS_ENCODED, then the `%`-encoding will be preserved instead in + * the userinfo, path, and query fields (and in the host field if also + * created with %G_URI_FLAGS_NON_DNS). In particular, this is necessary if + * the URI may contain binary data or non-UTF-8 text, or if decoding + * the components might change the interpretation of the URI. + * + * For example, with the encoded flag: + * + * |[ + * GUri *uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_ENCODED, &err); + * g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue"); + * ]| + * + * While the default `%`-decoding behaviour would give: + * + * |[ + * GUri *uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_NONE, &err); + * g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http://host/path?param=value"); + * ]| + * + * During decoding, if an invalid UTF-8 string is encountered, parsing will fail + * with an error indicating the bad string location: + * + * |[ + * GUri *uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fbad%3D%00alue", G_URI_FLAGS_NONE, &err); + * g_assert_error(err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY); + * ]| + * + * (you should pass %G_URI_FLAGS_ENCODED if you need to handle that case manually). + * + * #GUri is immutable once constructed, and can safely be accessed from + * multiple threads. Its reference counting is atomic. + * + * Since: 2.66 + */ +struct _GUri { + gchar *scheme; + gchar *userinfo; + gchar *host; + gint port; + gchar *path; + gchar *query; + gchar *fragment; + + gchar *user; + gchar *password; + gchar *auth_params; + + GUriFlags flags; +}; + +/** + * g_uri_ref: (skip) + * @uri: a #GUri + * + * Increments the reference count of @uri by one. + * + * Returns: @uri + * + * Since: 2.66 + */ +GUri * +g_uri_ref (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return g_atomic_rc_box_acquire (uri); +} + +static void g_uri_clear(GUri *uri) +{ + g_free (uri->scheme); + g_free (uri->userinfo); + g_free (uri->host); + g_free (uri->path); + g_free (uri->query); + g_free (uri->fragment); + g_free (uri->user); + g_free (uri->password); + g_free (uri->auth_params); +} + +/** + * g_uri_unref: (skip) + * @uri: a #GUri + * + * Atomically decrements the reference count of @uri by one. + * + * When the reference count reaches zero, the resources allocated by + * @uri are freed + * + * Since: 2.66 + */ +void +g_uri_unref (GUri *uri) +{ + g_return_if_fail (uri != NULL); + + g_atomic_rc_box_release_full (uri, (GDestroyNotify)g_uri_clear); +} + +static gboolean +g_uri_char_is_unreserved (gchar ch) +{ + if (g_ascii_isalnum (ch)) + return TRUE; + return ch == '-' || ch == '.' || ch == '_' || ch == '~'; +} + +#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10) +#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2])) + +static gssize +uri_decoder (gchar **out, + const gchar *start, + gsize length, + gboolean just_normalize, + GUriFlags flags, + GUriError parse_error, + GError **error) +{ + gchar *decoded, *d, c; + const gchar *invalid, *s, *end; + gssize len; + + if (!(flags & G_URI_FLAGS_ENCODED)) + just_normalize = FALSE; + + decoded = g_malloc (length + 1); + for (s = start, end = s + length, d = decoded; s < end; s++) + { + if (*s == '%') + { + if (!g_ascii_isxdigit (s[1]) || + !g_ascii_isxdigit (s[2])) + { + /* % followed by non-hex; this is an error */ + if (flags & G_URI_FLAGS_PARSE_STRICT) + { + g_set_error_literal (error, G_URI_ERROR, parse_error, + _("Invalid %-encoding in URI")); + g_free (decoded); + return -1; + } + + /* In non-strict mode, just let it through; we *don't* + * fix it to "%25", since that might change the way that + * the URI's owner would interpret it. + */ + *d++ = *s; + continue; + } + + c = HEXCHAR (s); + if (just_normalize && !g_uri_char_is_unreserved (c)) + { + /* Leave the % sequence there. */ + *d++ = *s; + } + else + { + *d++ = c; + s += 2; + } + } + else + *d++ = *s; + } + *d = '\0'; + + len = d - decoded; + + if (!(flags & G_URI_FLAGS_ENCODED) && + !g_utf8_validate (decoded, len, &invalid)) + { + g_set_error_literal (error, G_URI_ERROR, parse_error, + _("Non-UTF-8 characters in URI")); + g_free (decoded); + return -1; + } + + if (out) + *out = g_steal_pointer (&decoded); + + g_free (decoded); + return len; +} + +static gboolean +uri_decode (gchar **out, + const gchar *start, + gsize length, + GUriFlags flags, + GUriError parse_error, + GError **error) +{ + return uri_decoder (out, start, length, FALSE, flags, + parse_error, error) != -1; +} + +static gboolean +uri_normalize (gchar **out, + const gchar *start, + gsize length, + GUriFlags flags, + GUriError parse_error, + GError **error) +{ + return uri_decoder (out, start, length, TRUE, flags, + parse_error, error) != -1; +} + +static gboolean +is_valid (guchar c, + const gchar *reserved_chars_allowed) +{ + if (g_uri_char_is_unreserved (c)) + return TRUE; + + if (reserved_chars_allowed && strchr (reserved_chars_allowed, c)) + return TRUE; + + return FALSE; +} + +void +_uri_encoder (GString *out, + const guchar *start, + gsize length, + const gchar *reserved_chars_allowed, + gboolean allow_utf8) +{ + static const gchar hex[16] = "0123456789ABCDEF"; + const guchar *p = start; + const guchar *end = p + length; + + while (p < end) + { + if (allow_utf8 && *p >= 0x80 && + g_utf8_get_char_validated ((gchar *)p, end - p) > 0) + { + gint len = g_utf8_skip [*p]; + g_string_append_len (out, (gchar *)p, len); + p += len; + } + else if (is_valid (*p, reserved_chars_allowed)) + { + g_string_append_c (out, *p); + p++; + } + else + { + g_string_append_c (out, '%'); + g_string_append_c (out, hex[*p >> 4]); + g_string_append_c (out, hex[*p & 0xf]); + p++; + } + } +} + +static gboolean +parse_host (const gchar *start, + gsize length, + GUriFlags flags, + gchar **out, + GError **error) +{ + gchar *decoded, *host, *pct; + gchar *addr = NULL; + + if (*start == '[') + { + if (start[length - 1] != ']') + { + bad_ipv6_literal: + g_free (addr); + g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST, + _("Invalid IPv6 address '%.*s' in URI"), + (gint)length, start); + return FALSE; + } + + addr = g_strndup (start + 1, length - 2); + + /* If there's an IPv6 scope id, ignore it for the moment. */ + pct = strchr (addr, '%'); + if (pct) + *pct = '\0'; + + /* addr must be an IPv6 address */ + if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':')) + goto bad_ipv6_literal; + + if (pct) + { + *pct = '%'; + if (strchr (pct + 1, '%')) + goto bad_ipv6_literal; + /* If the '%' is encoded as '%25' (which it should be), decode it */ + if (pct[1] == '2' && pct[2] == '5' && pct[3]) + memmove (pct + 1, pct + 3, strlen (pct + 3) + 1); + } + + host = addr; + goto ok; + } + + if (g_ascii_isdigit (*start)) + { + addr = g_strndup (start, length); + if (g_hostname_is_ip_address (addr)) + { + host = addr; + goto ok; + } + g_free (addr); + } + + if (flags & G_URI_FLAGS_NON_DNS) + { + if (!uri_normalize (&decoded, start, length, flags, + G_URI_ERROR_BAD_HOST, error)) + return FALSE; + host = decoded; + goto ok; + } + + flags &= ~G_URI_FLAGS_ENCODED; + if (!uri_decode (&decoded, start, length, flags, + G_URI_ERROR_BAD_HOST, error)) + return FALSE; + + /* You're not allowed to %-encode an IP address, so if it wasn't + * one before, it better not be one now. + */ + if (g_hostname_is_ip_address (decoded)) + { + g_free (decoded); + g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST, + _("Illegal encoded IP address '%.*s' in URI"), + (gint)length, start); + return FALSE; + } + + if (g_hostname_is_non_ascii (decoded)) + { + host = g_hostname_to_ascii (decoded); + g_free (decoded); + } + else + host = decoded; + + ok: + if (out) + *out = host; + else + g_free (host); + return TRUE; +} + +static gboolean +parse_port (const gchar *start, + gsize length, + gint *out, + GError **error) +{ + gchar *end; + gulong parsed_port; + + /* strtoul() allows leading + or -, so we have to check this first. */ + if (!g_ascii_isdigit (*start)) + { + g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT, + _("Could not parse port '%.*s' in URI"), + (gint)length, start); + return FALSE; + } + + /* We know that *(start + length) is either '\0' or a non-numeric + * character, so strtoul() won't scan beyond it. + */ + parsed_port = strtoul (start, &end, 10); + if (end != start + length) + { + g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT, + _("Could not parse port '%.*s' in URI"), + (gint)length, start); + return FALSE; + } + else if (parsed_port > 65535) + { + g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT, + _("Port '%.*s' in URI is out of range"), + (gint)length, start); + return FALSE; + } + + if (out) + *out = parsed_port; + return TRUE; +} + +static gboolean +parse_userinfo (const gchar *start, + gsize length, + GUriFlags flags, + gchar **user, + gchar **password, + gchar **auth_params, + GError **error) +{ + const gchar *user_end = NULL, *password_end = NULL, *auth_params_end; + + auth_params_end = start + length; + if (flags & G_URI_FLAGS_HAS_AUTH_PARAMS) + password_end = memchr (start, ';', auth_params_end - start); + if (!password_end) + password_end = auth_params_end; + if (flags & G_URI_FLAGS_HAS_PASSWORD) + user_end = memchr (start, ':', password_end - start); + if (!user_end) + user_end = password_end; + + if (!uri_normalize (user, start, user_end - start, flags, + G_URI_ERROR_BAD_USER, error)) + return FALSE; + + if (*user_end == ':') + { + start = user_end + 1; + if (!uri_normalize (password, start, password_end - start, flags, + G_URI_ERROR_BAD_PASSWORD, error)) + { + if (user) + g_clear_pointer (user, g_free); + return FALSE; + } + } + else if (password) + *password = NULL; + + if (*password_end == ';') + { + start = password_end + 1; + if (!uri_normalize (auth_params, start, auth_params_end - start, flags, + G_URI_ERROR_BAD_AUTH_PARAMS, error)) + { + if (user) + g_clear_pointer (user, g_free); + if (password) + g_clear_pointer (password, g_free); + return FALSE; + } + } + else if (auth_params) + *auth_params = NULL; + + return TRUE; +} + +static gchar * +uri_cleanup (const gchar *uri_string) +{ + GString *copy; + const gchar *end; + + /* Skip leading whitespace */ + while (g_ascii_isspace (*uri_string)) + uri_string++; + + /* Ignore trailing whitespace */ + end = uri_string + strlen (uri_string); + while (end > uri_string && g_ascii_isspace (*(end - 1))) + end--; + + /* Copy the rest, encoding unencoded spaces and stripping other whitespace */ + copy = g_string_sized_new (end - uri_string); + while (uri_string < end) + { + if (*uri_string == ' ') + g_string_append (copy, "%20"); + else if (g_ascii_isspace (*uri_string)) + ; + else + g_string_append_c (copy, *uri_string); + uri_string++; + } + + return g_string_free (copy, FALSE); +} + +static gboolean +g_uri_split_internal (const gchar *uri_string, + GUriFlags flags, + gchar **scheme, + gchar **userinfo, + gchar **user, + gchar **password, + gchar **auth_params, + gchar **host, + gint *port, + gchar **path, + gchar **query, + gchar **fragment, + GError **error) +{ + const gchar *end, *colon, *at, *path_start, *semi, *question; + const gchar *p, *bracket, *hostend; + gchar *cleaned_uri_string = NULL; + + if (scheme) + *scheme = NULL; + if (userinfo) + *userinfo = NULL; + if (password) + *password = NULL; + if (auth_params) + *auth_params = NULL; + if (host) + *host = NULL; + if (port) + *port = -1; + if (path) + *path = NULL; + if (query) + *query = NULL; + if (fragment) + *fragment = NULL; + + if (!(flags & G_URI_FLAGS_PARSE_STRICT) && strpbrk (uri_string, " \t\n\r")) + { + cleaned_uri_string = uri_cleanup (uri_string); + uri_string = cleaned_uri_string; + } + + /* Find scheme */ + p = uri_string; + while (*p && (g_ascii_isalpha (*p) || + (p > uri_string && (g_ascii_isdigit (*p) || + *p == '.' || *p == '+' || *p == '-')))) + p++; + + if (p > uri_string && *p == ':') + { + if (scheme) + *scheme = g_ascii_strdown (uri_string, p - uri_string); + p++; + } + else + { + if (scheme) + *scheme = NULL; + p = uri_string; + } + + /* Check for authority */ + if (strncmp (p, "//", 2) == 0) + { + p += 2; + + path_start = p + strcspn (p, "/?#"); + at = memchr (p, '@', path_start - p); + if (at) + { + if (!(flags & G_URI_FLAGS_PARSE_STRICT)) + { + gchar *next_at; + + /* Any "@"s in the userinfo must be %-encoded, but + * people get this wrong sometimes. Since "@"s in the + * hostname are unlikely (and also wrong anyway), assume + * that if there are extra "@"s, they belong in the + * userinfo. + */ + do + { + next_at = memchr (at + 1, '@', path_start - (at + 1)); + if (next_at) + at = next_at; + } + while (next_at); + } + + if (user || password || auth_params || + (flags & (G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS))) + { + if (!parse_userinfo (p, at - p, flags, + user, password, auth_params, + error)) + goto fail; + } + + if (!uri_normalize (userinfo, p, at - p, flags, + G_URI_ERROR_BAD_USER, error)) + goto fail; + + p = at + 1; + } + + if (!(flags & G_URI_FLAGS_PARSE_STRICT)) + { + semi = strchr (p, ';'); + if (semi && semi < path_start) + { + /* Technically, semicolons are allowed in the "host" + * production, but no one ever does this, and some + * schemes mistakenly use semicolon as a delimiter + * marking the start of the path. We have to check this + * after checking for userinfo though, because a + * semicolon before the "@" must be part of the + * userinfo. + */ + path_start = semi; + } + } + + /* Find host and port. The host may be a bracket-delimited IPv6 + * address, in which case the colon delimiting the port must come + * (immediately) after the close bracket. + */ + if (*p == '[') + { + bracket = memchr (p, ']', path_start - p); + if (bracket && *(bracket + 1) == ':') + colon = bracket + 1; + else + colon = NULL; + } + else + colon = memchr (p, ':', path_start - p); + + hostend = colon ? colon : path_start; + if (!parse_host (p, hostend - p, flags, host, error)) + goto fail; + + if (colon && colon != path_start - 1) + { + p = colon + 1; + if (!parse_port (p, path_start - p, port, error)) + goto fail; + } + + p = path_start; + } + + /* Find fragment. */ + end = p + strcspn (p, "#"); + if (*end == '#') + { + if (!uri_decode (fragment, end + 1, strlen (end + 1), flags, + G_URI_ERROR_BAD_FRAGMENT, error)) + goto fail; + } + + /* Find query */ + question = memchr (p, '?', end - p); + if (question) + { + if (!uri_normalize (query, question + 1, end - (question + 1), flags, + G_URI_ERROR_BAD_QUERY, error)) + goto fail; + end = question; + } + + if (!uri_normalize (path, p, end - p, flags, + G_URI_ERROR_BAD_PATH, error)) + goto fail; + + g_free (cleaned_uri_string); + return TRUE; + + fail: + if (scheme) + g_clear_pointer (scheme, g_free); + if (userinfo) + g_clear_pointer (userinfo, g_free); + if (host) + g_clear_pointer (host, g_free); + if (port) + *port = -1; + if (path) + g_clear_pointer (path, g_free); + if (query) + g_clear_pointer (query, g_free); + if (fragment) + g_clear_pointer (fragment, g_free); + + g_free (cleaned_uri_string); + return FALSE; +} + +/** + * g_uri_split: + * @uri_string: a string containing a relative or absolute URI + * @flags: flags for parsing @uri_string + * @scheme: (out) (nullable) (optional) (transfer full): on return, contains + * the scheme (converted to lowercase), or %NULL + * @userinfo: (out) (nullable) (optional) (transfer full): on return, contains + * the userinfo, or %NULL + * @host: (out) (nullable) (optional) (transfer full): on return, contains the + * host, or %NULL + * @port: (out) (nullable) (optional) (transfer full): on return, contains the + * port, or -1 + * @path: (out) (nullable) (optional) (transfer full): on return, contains the + * path + * @query: (out) (nullable) (optional) (transfer full): on return, contains the + * query, or %NULL + * @fragment: (out) (nullable) (optional) (transfer full): on return, contains + * the fragment, or %NULL + * @error: #GError for error reporting, or %NULL to ignore. + * + * Parses @uri_string (which can be an absolute or relative URI) + * according to @flags, and returns the pieces. Any component that + * doesn't appear in @uri_string will be returned as %NULL (but note + * that all URIs always have a path component, though it may be the + * empty string). + * + * If @flags contains %G_URI_FLAGS_ENCODED, then `%`-encoded characters in + * @uri_string will remain encoded in the output strings. (If not, + * then all such characters will be decoded.) Note that decoding will + * only work if the URI components are ASCII or UTF-8, so you will + * need to use %G_URI_FLAGS_ENCODED if they are not. + * + * Note that the %G_URI_FLAGS_HAS_PASSWORD and + * %G_URI_FLAGS_HAS_AUTH_PARAMS @flags are ignored by g_uri_split(), + * since it always returns only the full userinfo; use + * g_uri_split_with_user() if you want it split up. + * + * Returns: (skip): %TRUE if @uri_string parsed successfully, %FALSE + * on error. + * + * Since: 2.66 + */ +gboolean +g_uri_split (const gchar *uri_string, + GUriFlags flags, + gchar **scheme, + gchar **userinfo, + gchar **host, + gint *port, + gchar **path, + gchar **query, + gchar **fragment, + GError **error) +{ + g_return_val_if_fail (uri_string != NULL, FALSE); + g_return_val_if_fail (error == NULL || *error == NULL, FALSE); + + return g_uri_split_internal (uri_string, flags, + scheme, userinfo, NULL, NULL, NULL, + host, port, path, query, fragment, + error); +} + +/** + * g_uri_split_with_user: + * @uri_string: a string containing a relative or absolute URI + * @flags: flags for parsing @uri_string + * @scheme: (out) (nullable) (optional) (transfer full): on return, contains + * the scheme (converted to lowercase), or %NULL + * @user: (out) (nullable) (optional) (transfer full): on return, contains + * the user, or %NULL + * @password: (out) (nullable) (optional) (transfer full): on return, contains + * the password, or %NULL + * @auth_params: (out) (nullable) (optional) (transfer full): on return, contains + * the auth_params, or %NULL + * @host: (out) (nullable) (optional) (transfer full): on return, contains the + * host, or %NULL + * @port: (out) (nullable) (optional) (transfer full): on return, contains the + * port, or -1 + * @path: (out) (nullable) (optional) (transfer full): on return, contains the + * path + * @query: (out) (nullable) (optional) (transfer full): on return, contains the + * query, or %NULL + * @fragment: (out) (nullable) (optional) (transfer full): on return, contains + * the fragment, or %NULL + * @error: #GError for error reporting, or %NULL to ignore. + * + * Parses @uri_string (which can be an absolute or relative URI) + * according to @flags, and returns the pieces. Any component that + * doesn't appear in @uri_string will be returned as %NULL (but note + * that all URIs always have a path component, though it may be the + * empty string). + * + * See g_uri_split(), and the definition of #GUriFlags, for more + * information on the effect of @flags. Note that @password will only + * be parsed out if @flags contains %G_URI_FLAGS_HAS_PASSWORD, and + * @auth_params will only be parsed out if @flags contains + * %G_URI_FLAGS_HAS_AUTH_PARAMS. + * + * Returns: (skip): %TRUE if @uri_string parsed successfully, %FALSE + * on error. + * + * Since: 2.66 + */ +gboolean +g_uri_split_with_user (const gchar *uri_string, + GUriFlags flags, + gchar **scheme, + gchar **user, + gchar **password, + gchar **auth_params, + gchar **host, + gint *port, + gchar **path, + gchar **query, + gchar **fragment, + GError **error) +{ + g_return_val_if_fail (uri_string != NULL, FALSE); + g_return_val_if_fail (error == NULL || *error == NULL, FALSE); + + return g_uri_split_internal (uri_string, flags, + scheme, NULL, user, password, auth_params, + host, port, path, query, fragment, + error); +} + + +/** + * g_uri_split_network: + * @uri_string: a string containing a relative or absolute URI + * @flags: flags for parsing @uri_string + * @scheme: (out) (nullable) (optional) (transfer full): on return, contains + * the scheme (converted to lowercase), or %NULL + * @host: (out) (nullable) (optional) (transfer full): on return, contains the + * host, or %NULL + * @port: (out) (nullable) (optional) (transfer full): on return, contains the + * port, or -1 + * @error: #GError for error reporting, or %NULL to ignore. + * + * Parses @uri_string (which must be an absolute URI) according to + * @flags, and returns the pieces relevant to connecting to a host. + * See the documentation for g_uri_split() for more details; this is + * mostly a wrapper around that function with simpler arguments. + * However, it will return an error if @uri_string is a relative URI, + * or does not contain a hostname component. + * + * Returns: (skip): %TRUE if @uri_string parsed successfully, + * %FALSE on error. + * + * Since: 2.66 + */ +gboolean +g_uri_split_network (const gchar *uri_string, + GUriFlags flags, + gchar **scheme, + gchar **host, + gint *port, + GError **error) +{ + gchar *my_scheme, *my_host; + + g_return_val_if_fail (uri_string != NULL, FALSE); + g_return_val_if_fail (error == NULL || *error == NULL, FALSE); + + if (!g_uri_split_internal (uri_string, flags, + &my_scheme, NULL, NULL, NULL, NULL, + &my_host, port, NULL, NULL, NULL, + error)) + return FALSE; + + if (!my_scheme || !my_host) + { + if (!my_scheme) + { + g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME, + _("URI '%s' is not an absolute URI"), + uri_string); + } + else + { + g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST, + _("URI '%s' has no host component"), + uri_string); + } + g_free (my_scheme); + g_free (my_host); + + return FALSE; + } + + if (scheme) + *scheme = my_scheme; + else + g_free (my_scheme); + if (host) + *host = my_host; + else + g_free (my_host); + return TRUE; +} + +/** + * g_uri_is_valid: + * @uri_string: a string containing a relative or absolute URI + * @flags: flags for parsing @uri_string + * @error: #GError for error reporting, or %NULL to ignore. + * + * Parses @uri_string (which can be an absolute or relative URI) + * according to @flags, to determine whether it is valid. + * + * See g_uri_split(), and the definition of #GUriFlags, for more + * information on the effect of @flags. + * + * Returns: %TRUE if @uri_string parsed successfully, %FALSE on error. + * + * Since: 2.66 + */ +gboolean +g_uri_is_valid (const gchar *uri_string, + GUriFlags flags, + GError **error) +{ + g_return_val_if_fail (uri_string != NULL, FALSE); + g_return_val_if_fail (error == NULL || *error == NULL, FALSE); + + return g_uri_split_internal (uri_string, flags, + NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, + error); +} + + +/* This does the "Remove Dot Segments" algorithm from section 5.2.4 of + * RFC 3986, except that @path is modified in place. + */ +static void +remove_dot_segments (gchar *path) +{ + gchar *p, *q; + + if (!*path) + return; + + /* Remove "./" where "." is a complete segment. */ + for (p = path + 1; *p; ) + { + if (*(p - 1) == '/' && + *p == '.' && *(p + 1) == '/') + memmove (p, p + 2, strlen (p + 2) + 1); + else + p++; + } + /* Remove "." at end. */ + if (p > path + 2 && + *(p - 1) == '.' && *(p - 2) == '/') + *(p - 1) = '\0'; + + /* Remove "/../" where != ".." */ + for (p = path + 1; *p; ) + { + if (!strncmp (p, "../", 3)) + { + p += 3; + continue; + } + q = strchr (p + 1, '/'); + if (!q) + break; + if (strncmp (q, "/../", 4) != 0) + { + p = q + 1; + continue; + } + memmove (p, q + 4, strlen (q + 4) + 1); + p = path + 1; + } + /* Remove "/.." at end where != ".." */ + q = strrchr (path, '/'); + if (q && q != path && !strcmp (q, "/..")) + { + p = q - 1; + while (p > path && *p != '/') + p--; + if (strncmp (p, "/../", 4) != 0) + *(p + 1) = 0; + } + + /* Remove extraneous initial "/.."s */ + while (!strncmp (path, "/../", 4)) + memmove (path, path + 3, strlen (path) - 2); + if (!strcmp (path, "/..")) + path[1] = '\0'; +} + +/** + * g_uri_parse: + * @uri_string: a string representing an absolute URI + * @flags: flags describing how to parse @uri_string + * @error: #GError for error reporting, or %NULL to ignore. + * + * Parses @uri_string according to @flags. If the result is not a + * valid absolute URI, it will be discarded, and an error returned. + * + * Return value: (transfer full): a new #GUri. + * + * Since: 2.66 + */ +GUri * +g_uri_parse (const gchar *uri_string, + GUriFlags flags, + GError **error) +{ + g_return_val_if_fail (uri_string != NULL, NULL); + g_return_val_if_fail (error == NULL || *error == NULL, NULL); + + return g_uri_parse_relative (NULL, uri_string, flags, error); +} + +/** + * g_uri_parse_relative: + * @base_uri: (nullable): a base URI + * @uri_string: a string representing a relative or absolute URI + * @flags: flags describing how to parse @uri_string + * @error: #GError for error reporting, or %NULL to ignore. + * + * Parses @uri_string according to @flags and, if it is a relative + * URI, resolves it relative to @base_uri. If the result is not a + * valid absolute URI, it will be discarded, and an error returned. + * + * Return value: (transfer full): a new #GUri. + * + * Since: 2.66 + */ +GUri * +g_uri_parse_relative (GUri *base_uri, + const gchar *uri_string, + GUriFlags flags, + GError **error) +{ + GUri *uri = NULL; + + g_return_val_if_fail (uri_string != NULL, NULL); + g_return_val_if_fail (error == NULL || *error == NULL, NULL); + g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL); + + uri = g_atomic_rc_box_new0 (GUri); + uri->flags = flags; + + if (!g_uri_split_internal (uri_string, flags, + &uri->scheme, &uri->userinfo, + &uri->user, &uri->password, &uri->auth_params, + &uri->host, &uri->port, + &uri->path, &uri->query, &uri->fragment, + error)) + goto fail; + + if (!uri->scheme && !base_uri) + { + g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC, + _("URI is not absolute, and no base URI was provided")); + goto fail; + } + + if (base_uri) + { + /* This is section 5.2.2 of RFC 3986, except that we're doing + * it in place in @uri rather than copying from R to T. + */ + if (uri->scheme) + remove_dot_segments (uri->path); + else + { + uri->scheme = g_strdup (base_uri->scheme); + if (uri->host) + remove_dot_segments (uri->path); + else + { + if (!*uri->path) + { + g_free (uri->path); + uri->path = g_strdup (base_uri->path); + if (!uri->query) + uri->query = g_strdup (base_uri->query); + } + else + { + if (*uri->path == '/') + remove_dot_segments (uri->path); + else + { + gchar *newpath, *last; + + last = strrchr (base_uri->path, '/'); + if (last) + { + newpath = g_strdup_printf ("%.*s/%s", + (gint)(last - base_uri->path), + base_uri->path, + uri->path); + } + else + newpath = g_strdup_printf ("/%s", uri->path); + + g_free (uri->path); + uri->path = newpath; + + remove_dot_segments (uri->path); + } + } + + uri->userinfo = g_strdup (base_uri->userinfo); + uri->user = g_strdup (base_uri->user); + uri->password = g_strdup (base_uri->password); + uri->auth_params = g_strdup (base_uri->auth_params); + uri->host = g_strdup (base_uri->host); + uri->port = base_uri->port; + } + } + } + + return uri; + + fail: + if (uri) + g_uri_unref (uri); + return NULL; +} + +/** + * g_uri_resolve_relative: + * @base_uri_string: (nullable): a string representing a base URI + * @uri_string: a string representing a relative or absolute URI + * @flags: flags describing how to parse @uri_string + * @error: #GError for error reporting, or %NULL to ignore. + * + * Parses @uri_string according to @flags and, if it is a relative + * URI, resolves it relative to @base_uri_string. If the result is not + * a valid absolute URI, it will be discarded, and an error returned. + * + * (If @base_uri_string is %NULL, this just returns @uri_string, or + * %NULL if @uri_string is invalid or not absolute.) + * + * Return value: the resolved URI string. + * + * Since: 2.66 + */ +gchar * +g_uri_resolve_relative (const gchar *base_uri_string, + const gchar *uri_string, + GUriFlags flags, + GError **error) +{ + GUri *base_uri, *resolved_uri; + gchar *resolved_uri_string; + + g_return_val_if_fail (uri_string != NULL, NULL); + g_return_val_if_fail (error == NULL || *error == NULL, NULL); + + flags |= G_URI_FLAGS_ENCODED; + + if (base_uri_string) + { + base_uri = g_uri_parse (base_uri_string, flags, error); + if (!base_uri) + return NULL; + } + else + base_uri = NULL; + + resolved_uri = g_uri_parse_relative (base_uri, uri_string, flags, error); + if (base_uri) + g_uri_unref (base_uri); + if (!resolved_uri) + return NULL; + + resolved_uri_string = g_uri_to_string (resolved_uri); + g_uri_unref (resolved_uri); + return resolved_uri_string; +} + +/* userinfo as a whole can contain sub-delims + ":", but split-out + * user can't contain ":" or ";", and split-out password can't contain + * ";". + */ +#define USERINFO_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO +#define USER_ALLOWED_CHARS "!$&'()*+,=" +#define PASSWORD_ALLOWED_CHARS "!$&'()*+,=:" +#define AUTH_PARAMS_ALLOWED_CHARS USERINFO_ALLOWED_CHARS +#define IP_ADDR_ALLOWED_CHARS ":" +#define HOST_ALLOWED_CHARS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS +#define PATH_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH +#define QUERY_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?" +#define FRAGMENT_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?" + +static gchar * +g_uri_join_internal (GUriFlags flags, + const gchar *scheme, + const gchar *user, + const gchar *password, + const gchar *auth_params, + const gchar *host, + gint port, + const gchar *path, + const gchar *query, + const gchar *fragment) +{ + gboolean encoded = (flags & G_URI_FLAGS_ENCODED); + GString *str; + + str = g_string_new (scheme); + g_string_append_c (str, ':'); + + if (host) + { + g_string_append (str, "//"); + + if (user) + { + if (encoded) + g_string_append (str, user); + else + { + /* Encode ':' and ';' regardless of whether we have a + * password or auth params, since it may be parsed later + * under the assumption that it does. + */ + g_string_append_uri_escaped (str, user, USER_ALLOWED_CHARS, TRUE); + } + + if (password) + { + g_string_append_c (str, ':'); + if (encoded) + g_string_append (str, password); + else + g_string_append_uri_escaped (str, password, + PASSWORD_ALLOWED_CHARS, TRUE); + } + + if (auth_params) + { + g_string_append_c (str, ';'); + if (encoded) + g_string_append (str, auth_params); + else + g_string_append_uri_escaped (str, auth_params, + AUTH_PARAMS_ALLOWED_CHARS, TRUE); + } + + g_string_append_c (str, '@'); + } + + if (strchr (host, ':')) + { + g_string_append_c (str, '['); + if (encoded) + g_string_append (str, host); + else + g_string_append_uri_escaped (str, host, IP_ADDR_ALLOWED_CHARS, TRUE); + g_string_append_c (str, ']'); + } + else + { + if (encoded) + g_string_append (str, host); + else + g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE); + } + + if (port != -1) + g_string_append_printf (str, ":%d", port); + } + + if (encoded) + g_string_append (str, path); + else + g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE); + + if (query) + { + g_string_append_c (str, '?'); + if (encoded) + g_string_append (str, query); + else + g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE); + } + if (fragment) + { + g_string_append_c (str, '#'); + if (encoded) + g_string_append (str, fragment); + else + g_string_append_uri_escaped (str, fragment, FRAGMENT_ALLOWED_CHARS, TRUE); + } + + return g_string_free (str, FALSE); +} + +/** + * g_uri_join: + * @flags: flags describing how to build the URI string + * @scheme: the URI scheme + * @userinfo: (nullable): the userinfo component, or %NULL + * @host: (nullable): the host component, or %NULL + * @port: the port, or -1 + * @path: the path component + * @query: (nullable): the query component, or %NULL + * @fragment: (nullable): the fragment, or %NULL + * + * Joins the given components together according to @flags to create + * a complete URI string. At least @scheme must be specified, and + * @path may not be %NULL (though it may be ""). + * + * See also g_uri_join_with_user(), which allows specifying the + * components of the "userinfo" separately. + * + * Return value: a URI string + * + * Since: 2.66 + */ +gchar * +g_uri_join (GUriFlags flags, + const gchar *scheme, + const gchar *userinfo, + const gchar *host, + gint port, + const gchar *path, + const gchar *query, + const gchar *fragment) +{ + g_return_val_if_fail (scheme != NULL, NULL); + g_return_val_if_fail (port >= -1 && port <= 65535, NULL); + g_return_val_if_fail (path != NULL, NULL); + + return g_uri_join_internal (flags, + scheme, + userinfo, NULL, NULL, + host, + port, + path, + query, + fragment); +} + +/** + * g_uri_join_with_user: + * @flags: flags describing how to build the URI string + * @scheme: the URI scheme + * @user: (nullable): the user component of the userinfo, or %NULL + * @password: (nullable): the password component of the userinfo, or + * %NULL + * @auth_params: (nullable): the auth params of the userinfo, or + * %NULL + * @host: (nullable): the host component, or %NULL + * @port: the port, or -1 + * @path: the path component + * @query: (nullable): the query component, or %NULL + * @fragment: (nullable): the fragment, or %NULL + * + * Joins the given components together according to @flags to create + * a complete URI string. At least @scheme must be specified, and + * @path may not be %NULL (though it may be ""). + * + * In constrast to g_uri_join(), this allows specifying the components + * of the "userinfo" separately. + * + * Return value: a URI string + * + * Since: 2.66 + */ +gchar * +g_uri_join_with_user (GUriFlags flags, + const gchar *scheme, + const gchar *user, + const gchar *password, + const gchar *auth_params, + const gchar *host, + gint port, + const gchar *path, + const gchar *query, + const gchar *fragment) +{ + g_return_val_if_fail (scheme != NULL, NULL); + g_return_val_if_fail (port >= -1 && port <= 65535, NULL); + g_return_val_if_fail (path != NULL, NULL); + + return g_uri_join_internal (flags, + scheme, + user, password, auth_params, + host, + port, + path, + query, + fragment); +} + +/** + * g_uri_build: + * @flags: flags describing how to build the #GUri + * @scheme: the URI scheme + * @userinfo: (nullable): the userinfo component, or %NULL + * @host: (nullable): the host component, or %NULL + * @port: the port, or -1 + * @path: the path component + * @query: (nullable): the query component, or %NULL + * @fragment: (nullable): the fragment, or %NULL + * + * Creates a new #GUri from the given components according to @flags. + * + * See also g_uri_build_with_user(), which allows specifying the + * components of the "userinfo" separately. + * + * Return value: (transfer full): a new #GUri + * + * Since: 2.66 + */ +GUri * +g_uri_build (GUriFlags flags, + const gchar *scheme, + const gchar *userinfo, + const gchar *host, + gint port, + const gchar *path, + const gchar *query, + const gchar *fragment) +{ + GUri *uri; + + g_return_val_if_fail (scheme != NULL, NULL); + g_return_val_if_fail (port >= -1 && port <= 65535, NULL); + g_return_val_if_fail (path != NULL, NULL); + + uri = g_atomic_rc_box_new0 (GUri); + uri->flags = flags; + uri->scheme = g_ascii_strdown (scheme, -1); + uri->userinfo = g_strdup (userinfo); + uri->host = g_strdup (host); + uri->port = port; + uri->path = g_strdup (path); + uri->query = g_strdup (query); + uri->fragment = g_strdup (fragment); + + return uri; +} + +/** + * g_uri_build_with_user: + * @flags: flags describing how to build the #GUri + * @scheme: the URI scheme + * @user: (nullable): the user component of the userinfo, or %NULL + * @password: (nullable): the password component of the userinfo, or %NULL + * @auth_params: (nullable): the auth params of the userinfo, or %NULL + * @host: (nullable): the host component, or %NULL + * @port: the port, or -1 + * @path: the path component + * @query: (nullable): the query component, or %NULL + * @fragment: (nullable): the fragment, or %NULL + * + * Creates a new #GUri from the given components according to @flags. + + * In constrast to g_uri_build(), this allows specifying the components + * of the "userinfo" field separately. Note that @user must be non-%NULL + * if either @password or @auth_params is non-%NULL. + * + * Return value: (transfer full): a new #GUri + * + * Since: 2.66 + */ +GUri * +g_uri_build_with_user (GUriFlags flags, + const gchar *scheme, + const gchar *user, + const gchar *password, + const gchar *auth_params, + const gchar *host, + gint port, + const gchar *path, + const gchar *query, + const gchar *fragment) +{ + GUri *uri; + GString *userinfo; + + g_return_val_if_fail (scheme != NULL, NULL); + g_return_val_if_fail (password == NULL || user != NULL, NULL); + g_return_val_if_fail (auth_params == NULL || user != NULL, NULL); + g_return_val_if_fail (port >= -1 && port <= 65535, NULL); + g_return_val_if_fail (path != NULL, NULL); + + uri = g_atomic_rc_box_new0 (GUri); + uri->flags = flags; + uri->scheme = g_ascii_strdown (scheme, -1); + uri->user = g_strdup (user); + uri->password = g_strdup (password); + uri->auth_params = g_strdup (auth_params); + uri->host = g_strdup (host); + uri->port = port; + uri->path = g_strdup (path); + uri->query = g_strdup (query); + uri->fragment = g_strdup (fragment); + + if (user) + { + userinfo = g_string_new (NULL); + if (flags & G_URI_FLAGS_ENCODED) + g_string_append (userinfo, uri->user); + else + g_string_append_uri_escaped (userinfo, uri->user, USER_ALLOWED_CHARS, TRUE); + if (password) + { + g_string_append_c (userinfo, ':'); + if (flags & G_URI_FLAGS_ENCODED) + g_string_append (userinfo, uri->password); + else + g_string_append_uri_escaped (userinfo, uri->password, + PASSWORD_ALLOWED_CHARS, TRUE); + } + if (auth_params) + { + g_string_append_c (userinfo, ';'); + if (flags & G_URI_FLAGS_ENCODED) + g_string_append (userinfo, uri->auth_params); + else + g_string_append_uri_escaped (userinfo, + uri->auth_params, AUTH_PARAMS_ALLOWED_CHARS, TRUE); + } + uri->userinfo = g_string_free (userinfo, FALSE); + } + else + uri->userinfo = NULL; + + return uri; +} + +/** + * g_uri_to_string: + * @uri: a #GUri + * + * Returns a string representing @uri. + * + * This is not guaranteed to return a string which is identical to the + * string that @uri was parsed from. However, if the source URI was + * syntactically correct (according to RFC 3986), and it was parsed + * with %G_URI_FLAGS_ENCODED, then g_uri_to_string() is guaranteed to return + * a string which is at least semantically equivalent to the source + * URI (according to RFC 3986). + * + * Return value: a string representing @uri, which the caller must + * free. + * + * Since: 2.66 + */ +gchar * +g_uri_to_string (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return g_uri_to_string_partial (uri, 0); +} + +/** + * g_uri_to_string_partial: + * @uri: a #GUri + * @flags: flags describing what parts of @uri to hide + * + * Returns a string representing @uri, subject to the options in + * @flags. See g_uri_to_string() and #GUriHideFlags for more details. + + * Return value: a string representing @uri, which the caller must + * free. + * + * Since: 2.66 + */ +gchar * +g_uri_to_string_partial (GUri *uri, + GUriHideFlags flags) +{ + gboolean hide_user = (flags & G_URI_HIDE_USERINFO); + gboolean hide_password = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_PASSWORD)); + gboolean hide_auth_params = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_AUTH_PARAMS)); + gboolean hide_fragment = (flags & G_URI_HIDE_FRAGMENT); + + g_return_val_if_fail (uri != NULL, NULL); + + if (uri->flags & (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_HAS_AUTH_PARAMS)) + { + return g_uri_join_with_user (uri->flags, + uri->scheme, + hide_user ? NULL : uri->user, + hide_password ? NULL : uri->password, + hide_auth_params ? NULL : uri->auth_params, + uri->host, + uri->port, + uri->path, + uri->query, + hide_fragment ? NULL : uri->fragment); + } + + return g_uri_join (uri->flags, + uri->scheme, + hide_user ? NULL : uri->userinfo, + uri->host, + uri->port, + uri->path, + uri->query, + hide_fragment ? NULL : uri->fragment); +} + +/* This is just a copy of g_str_hash() with g_ascii_toupper() added */ +static guint +str_ascii_case_hash (gconstpointer v) +{ + const signed char *p; + guint32 h = 5381; + + for (p = v; *p != '\0'; p++) + h = (h << 5) + h + g_ascii_toupper (*p); + + return h; +} + +static gboolean +str_ascii_case_equal (gconstpointer v1, + gconstpointer v2) +{ + const gchar *string1 = v1; + const gchar *string2 = v2; + + return g_ascii_strcasecmp (string1, string2) == 0; +} + +/** + * g_uri_parse_params: + * @params: a `%`-encoded string containing "attribute=value" + * parameters + * @length: the length of @params, or -1 if it is NUL-terminated + * @separator: the separator character between parameters. + * (usually ';', but sometimes '&') + * @case_insensitive: whether parameter names are case insensitive + * + * Many URI schemes include one or more attribute/value pairs as part of the URI + * value. This method can be used to parse them into a hash table. + * + * The @params string is assumed to still be `%`-encoded, but the returned + * values will be fully decoded. (Thus it is possible that the returned values + * may contain '=' or @separator, if the value was encoded in the input.) + * Invalid `%`-encoding is treated as with the non-%G_URI_FLAGS_PARSE_STRICT + * rules for g_uri_parse(). (However, if @params is the path or query string + * from a #GUri that was parsed with %G_URI_FLAGS_PARSE_STRICT and + * %G_URI_FLAGS_ENCODED, then you already know that it does not contain any + * invalid encoding.) + * + * Return value: (transfer full) (element-type utf8 utf8): a hash table of + * attribute/value pairs. Both names and values will be fully-decoded. If + * @params cannot be parsed (eg, it contains two @separator characters in a + * row), then %NULL is returned. + * + * Since: 2.66 + */ +GHashTable * +g_uri_parse_params (const gchar *params, + gssize length, + gchar separator, + gboolean case_insensitive) +{ + GHashTable *hash; + const gchar *end, *attr, *attr_end, *value, *value_end; + gchar *decoded_attr, *decoded_value; + + g_return_val_if_fail (params != NULL, NULL); + g_return_val_if_fail (length >= -1, NULL); + + if (case_insensitive) + { + hash = g_hash_table_new_full (str_ascii_case_hash, + str_ascii_case_equal, + g_free, g_free); + } + else + { + hash = g_hash_table_new_full (g_str_hash, g_str_equal, + g_free, g_free); + } + + if (length == -1) + end = params + strlen (params); + else + end = params + length; + + attr = params; + while (attr < end) + { + value_end = memchr (attr, separator, end - attr); + if (!value_end) + value_end = end; + + attr_end = memchr (attr, '=', value_end - attr); + if (!attr_end) + { + g_hash_table_destroy (hash); + return NULL; + } + if (!uri_decode (&decoded_attr, attr, attr_end - attr, + 0, G_URI_ERROR_MISC, NULL)) + { + g_hash_table_destroy (hash); + return NULL; + } + + value = attr_end + 1; + if (!uri_decode (&decoded_value, value, value_end - value, + 0, G_URI_ERROR_MISC, NULL)) + { + g_free (decoded_attr); + g_hash_table_destroy (hash); + return NULL; + } + + g_hash_table_insert (hash, decoded_attr, decoded_value); + attr = value_end + 1; + } + + return hash; +} + +/** + * g_uri_get_scheme: + * @uri: a #GUri + * + * Gets @uri's scheme. Note that this will always be all-lowercase, + * regardless of the string or strings that @uri was created from. + * + * Return value: @uri's scheme. + * + * Since: 2.66 + */ +const gchar * +g_uri_get_scheme (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return uri->scheme; +} + +/** + * g_uri_get_userinfo: + * @uri: a #GUri + * + * Gets @uri's userinfo, which may contain `%`-encoding, depending on + * the flags with which @uri was created. + * + * Return value: @uri's userinfo. + * + * Since: 2.66 + */ +const gchar * +g_uri_get_userinfo (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return uri->userinfo; +} + +/** + * g_uri_get_user: + * @uri: a #GUri + * + * Gets the "username" component of @uri's userinfo, which may contain + * `%`-encoding, depending on the flags with which @uri was created. + * If @uri was not created with %G_URI_FLAGS_HAS_PASSWORD or + * %G_URI_FLAGS_HAS_AUTH_PARAMS, this is the same as g_uri_get_userinfo(). + * + * Return value: @uri's user. + * + * Since: 2.66 + */ +const gchar * +g_uri_get_user (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return uri->user; +} + +/** + * g_uri_get_password: + * @uri: a #GUri + * + * Gets @uri's password, which may contain `%`-encoding, depending on + * the flags with which @uri was created. (If @uri was not created + * with %G_URI_FLAGS_HAS_PASSWORD then this will be %NULL.) + * + * Return value: @uri's password. + * + * Since: 2.66 + */ +const gchar * +g_uri_get_password (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return uri->password; +} + +/** + * g_uri_get_auth_params: + * @uri: a #GUri + * + * Gets @uri's authentication parameters, which may contain + * `%`-encoding, depending on the flags with which @uri was created. + * (If @uri was not created with %G_URI_FLAGS_HAS_AUTH_PARAMS then this will + * be %NULL.) + * + * Depending on the URI scheme, g_uri_parse_params() may be useful for + * further parsing this information. + * + * Return value: @uri's authentication parameters. + * + * Since: 2.66 + */ +const gchar * +g_uri_get_auth_params (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return uri->auth_params; +} + +/** + * g_uri_get_host: + * @uri: a #GUri + * + * Gets @uri's host. This will never have `%`-encoded characters, + * unless it is non-UTF-8 (which can only be the case if @uri was + * created with %G_URI_FLAGS_NON_DNS). + * + * If @uri contained an IPv6 address literal, this value will be just + * that address, without the brackets around it that are necessary in + * the string form of the URI. Note that in this case there may also + * be a scope ID attached to the address. Eg, "`fe80::1234%``em1`" (or + * "`fe80::1234%``25em1" if the string is still encoded). + * + * Return value: @uri's host. + * + * Since: 2.66 + */ +const gchar * +g_uri_get_host (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return uri->host; +} + +/** + * g_uri_get_port: + * @uri: a #GUri + * + * Gets @uri's port. + * + * Return value: @uri's port, or -1 if no port was specified. + * + * Since: 2.66 + */ +gint +g_uri_get_port (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, -1); + + return uri->port; +} + +/** + * g_uri_get_path: + * @uri: a #GUri + * + * Gets @uri's path, which may contain `%`-encoding, depending on the + * flags with which @uri was created. + * + * Return value: @uri's path. + * + * Since: 2.66 + */ +const gchar * +g_uri_get_path (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return uri->path; +} + +/** + * g_uri_get_query: + * @uri: a #GUri + * + * Gets @uri's query, which may contain `%`-encoding, depending on the + * flags with which @uri was created. + * + * For queries consisting of a series of "`name=value`" parameters, + * g_uri_parse_params() may be useful. + * + * Return value: @uri's query. + * + * Since: 2.66 + */ +const gchar * +g_uri_get_query (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return uri->query; +} + +/** + * g_uri_get_fragment: + * @uri: a #GUri + * + * Gets @uri's fragment, which may contain `%`-encoding, depending on + * the flags with which @uri was created. + * + * Return value: @uri's fragment. + * + * Since: 2.66 + */ +const gchar * +g_uri_get_fragment (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, NULL); + + return uri->fragment; +} + + +/** + * g_uri_get_flags: + * @uri: a #GUri + * + * Gets @uri's flags set upon construction. + * + * Return value: @uri's flags. + * + * Since: 2.66 + **/ +GUriFlags +g_uri_get_flags (GUri *uri) +{ + g_return_val_if_fail (uri != NULL, 0); + + return uri->flags; +} + +/** + * g_uri_unescape_segment: + * @escaped_string: (nullable): A string, may be %NULL + * @escaped_string_end: (nullable): Pointer to end of @escaped_string, + * may be %NULL + * @illegal_characters: (nullable): An optional string of illegal + * characters not to be allowed, may be %NULL + * + * Unescapes a segment of an escaped string. + * + * If any of the characters in @illegal_characters or the NUL + * character appears as an escaped character in @escaped_string, then + * that is an error and %NULL will be returned. This is useful if you + * want to avoid for instance having a slash being expanded in an + * escaped path element, which might confuse pathname handling. + * + * Returns: an unescaped version of @escaped_string or %NULL on error. + * The returned string should be freed when no longer needed. As a + * special case if %NULL is given for @escaped_string, this function + * will return %NULL. + * + * Since: 2.16 + **/ +gchar * +g_uri_unescape_segment (const gchar *escaped_string, + const gchar *escaped_string_end, + const gchar *illegal_characters) +{ + gchar *unescaped, *p; + gsize length; + + if (!escaped_string) + return NULL; + + if (escaped_string_end) + length = escaped_string_end - escaped_string; + else + length = strlen (escaped_string); + + if (!uri_decode (&unescaped, + escaped_string, length, + G_URI_FLAGS_PARSE_STRICT, + 0, NULL)) + return NULL; + + if (illegal_characters) + { + for (p = unescaped; *p; p++) + { + if (strchr (illegal_characters, *p)) + { + g_free (unescaped); + return NULL; + } + } + } + + return unescaped; +} + +/** + * g_uri_unescape_string: + * @escaped_string: an escaped string to be unescaped. + * @illegal_characters: (nullable): a string of illegal characters + * not to be allowed, or %NULL. + * + * Unescapes a whole escaped string. + * + * If any of the characters in @illegal_characters or the NUL + * character appears as an escaped character in @escaped_string, then + * that is an error and %NULL will be returned. This is useful if you + * want to avoid for instance having a slash being expanded in an + * escaped path element, which might confuse pathname handling. + * + * Returns: an unescaped version of @escaped_string. The returned string + * should be freed when no longer needed. + * + * Since: 2.16 + **/ +gchar * +g_uri_unescape_string (const gchar *escaped_string, + const gchar *illegal_characters) +{ + return g_uri_unescape_segment (escaped_string, NULL, illegal_characters); +} + +/** + * g_uri_escape_string: + * @unescaped: the unescaped input string. + * @reserved_chars_allowed: (nullable): a string of reserved + * characters that are allowed to be used, or %NULL. + * @allow_utf8: %TRUE if the result can include UTF-8 characters. + * + * Escapes a string for use in a URI. + * + * Normally all characters that are not "unreserved" (i.e. ASCII + * alphanumerical characters plus dash, dot, underscore and tilde) are + * escaped. But if you specify characters in @reserved_chars_allowed + * they are not escaped. This is useful for the "reserved" characters + * in the URI specification, since those are allowed unescaped in some + * portions of a URI. + * + * Returns: an escaped version of @unescaped. The returned string + * should be freed when no longer needed. + * + * Since: 2.16 + **/ +gchar * +g_uri_escape_string (const gchar *unescaped, + const gchar *reserved_chars_allowed, + gboolean allow_utf8) +{ + GString *s; + + g_return_val_if_fail (unescaped != NULL, NULL); + + s = g_string_sized_new (strlen (unescaped) * 1.25); + + g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8); + + return g_string_free (s, FALSE); +} + +/** + * g_uri_unescape_bytes: + * @escaped_string: A URI-escaped string + * @length: the length of @escaped_string to escape, or -1 if it + * is NUL-terminated. + * + * Unescapes a segment of an escaped string as binary data. + * + * Note that in contrast to g_uri_unescape_string(), this does allow + * `NUL` bytes to appear in the output. + * + * Returns: (transfer full): an unescaped version of @escaped_string + * or %NULL on error. The returned #GBytes should be unreffed when no + * longer needed. + * + * Since: 2.66 + **/ +GBytes * +g_uri_unescape_bytes (const gchar *escaped_string, + gssize length) +{ + gchar *buf; + gssize unescaped_length; + + g_return_val_if_fail (escaped_string != NULL, NULL); + + if (length == -1) + length = strlen (escaped_string); + + unescaped_length = uri_decoder (&buf, + escaped_string, length, + FALSE, + G_URI_FLAGS_PARSE_STRICT|G_URI_FLAGS_ENCODED, + 0, NULL); + if (unescaped_length == -1) + return NULL; + + return g_bytes_new_take (buf, unescaped_length); +} + +/** + * g_uri_escape_bytes: + * @unescaped: (array length=length): the unescaped input data. + * @length: the length of @unescaped + * @reserved_chars_allowed: (nullable): a string of reserved + * characters that are allowed to be used, or %NULL. + * + * Escapes arbitrary data for use in a URI. + * + * Normally all characters that are not "unreserved" (i.e. ASCII + * alphanumerical characters plus dash, dot, underscore and tilde) are + * escaped. But if you specify characters in @reserved_chars_allowed + * they are not escaped. This is useful for the "reserved" characters + * in the URI specification, since those are allowed unescaped in some + * portions of a URI. + * + * Though technically incorrect, this will also allow escaping "0" + * bytes as "`%``00`". + * + * Returns: an escaped version of @unescaped. The returned string + * should be freed when no longer needed. + * + * Since: 2.66 + */ +gchar * +g_uri_escape_bytes (const guchar *unescaped, + gsize length, + const gchar *reserved_chars_allowed) +{ + GString *string; + + g_return_val_if_fail (unescaped != NULL, NULL); + + string = g_string_sized_new (length * 1.25); + + _uri_encoder (string, unescaped, length, + reserved_chars_allowed, FALSE); + + return g_string_free (string, FALSE); +} + +static gint +g_uri_scheme_length (const gchar *uri) +{ + const gchar *p; + + p = uri; + if (!g_ascii_isalpha (*p)) + return -1; + p++; + while (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-') + p++; + + if (p > uri && *p == ':') + return p - uri; + + return -1; +} + +/** + * g_uri_parse_scheme: + * @uri: a valid URI. + * + * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as: + * |[ + * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + * ]| + * Common schemes include "file", "http", "svn+ssh", etc. + * + * Returns: The "scheme" component of the URI, or %NULL on error. + * The returned string should be freed when no longer needed. + * + * Since: 2.16 + **/ +gchar * +g_uri_parse_scheme (const gchar *uri) +{ + gint len; + + g_return_val_if_fail (uri != NULL, NULL); + + len = g_uri_scheme_length (uri); + return len == -1 ? NULL : g_strndup (uri, len); +} + +/** + * g_uri_peek_scheme: + * @uri: a valid URI. + * + * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as: + * |[ + * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + * ]| + * Common schemes include "file", "http", "svn+ssh", etc. + * + * Returns: The "scheme" component of the URI, or %NULL on error. The + * returned string is normalized to all-lowercase, and interned via + * g_intern_string(), so it does not need to be freed. + * + * Since: 2.66 + **/ +const gchar * +g_uri_peek_scheme (const gchar *uri) +{ + gint len; + gchar *lower_scheme; + const gchar *scheme; + + g_return_val_if_fail (uri != NULL, NULL); + + len = g_uri_scheme_length (uri); + if (len == -1) + return NULL; + + lower_scheme = g_ascii_strdown (uri, len); + scheme = g_intern_string (lower_scheme); + g_free (lower_scheme); + + return scheme; +} + +G_DEFINE_QUARK (g-uri-quark, g_uri_error) diff --git a/glib/guri.h b/glib/guri.h new file mode 100644 index 000000000..caec641cf --- /dev/null +++ b/glib/guri.h @@ -0,0 +1,347 @@ +/* GLIB - Library of useful routines for C programming + * Copyright © 2020 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General + * Public License along with this library; if not, see + * . + */ + +#pragma once + +#if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION) +#error "Only can be included directly." +#endif + +#include + +G_BEGIN_DECLS + +G_GNUC_BEGIN_IGNORE_DEPRECATIONS + +typedef struct _GUri GUri; + +GLIB_AVAILABLE_IN_2_66 +GUri * g_uri_ref (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +void g_uri_unref (GUri *uri); + +/** + * GUriFlags: + * @G_URI_FLAGS_PARSE_STRICT: Parse the URI strictly according to the RFC + * 3986 grammar, rather than fixing up or ignoring common mistakes. + * @G_URI_FLAGS_HAS_PASSWORD: The userinfo field may contain a password, + * which will be separated from the username by ':'. + * @G_URI_FLAGS_HAS_AUTH_PARAMS: The userinfo may contain additional + * authentication-related parameters, which will be separated from + * the username and/or password by ';'. + * @G_URI_FLAGS_NON_DNS: The host component should not be assumed to be a + * DNS hostname or IP address. (Eg, for `smb` URIs with NetBIOS + * hostnames). + * @G_URI_FLAGS_ENCODED: When parsing a URI, this indicates that `%`-encoded + * characters in the userinfo, path, query, and fragment fields + * should not be decoded. (And likewise the host field if + * %G_URI_FLAGS_NON_DNS is also set.) When building a URI, it indicates + * that you have already `%`-encoded the components, and so #GUri + * should not do any encoding itself. + * @G_URI_FLAGS_NONE: No flags set. + * + * Flags that describe a URI. + * + * When parsing a URI, if you need to choose different flags based on + * the type of URI, you can use g_uri_peek_scheme() on the URI string + * to check the scheme first, and use that to decide what flags to + * parse it with. + * + * Since: 2.66 + */ +GLIB_AVAILABLE_TYPE_IN_2_66 +typedef enum { + G_URI_FLAGS_NONE = 0, + G_URI_FLAGS_PARSE_STRICT = 1 << 0, + G_URI_FLAGS_HAS_PASSWORD = 1 << 1, + G_URI_FLAGS_HAS_AUTH_PARAMS = 1 << 2, + G_URI_FLAGS_ENCODED = 1 << 3, + G_URI_FLAGS_NON_DNS = 1 << 4, +} GUriFlags; + +GLIB_AVAILABLE_IN_2_66 +gboolean g_uri_split (const gchar *uri_string, + GUriFlags flags, + gchar **scheme, + gchar **userinfo, + gchar **host, + gint *port, + gchar **path, + gchar **query, + gchar **fragment, + GError **error); +GLIB_AVAILABLE_IN_2_66 +gboolean g_uri_split_with_user (const gchar *uri_string, + GUriFlags flags, + gchar **scheme, + gchar **user, + gchar **password, + gchar **auth_params, + gchar **host, + gint *port, + gchar **path, + gchar **query, + gchar **fragment, + GError **error); +GLIB_AVAILABLE_IN_2_66 +gboolean g_uri_split_network (const gchar *uri_string, + GUriFlags flags, + gchar **scheme, + gchar **host, + gint *port, + GError **error); + +GLIB_AVAILABLE_IN_2_66 +gboolean g_uri_is_valid (const gchar *uri_string, + GUriFlags flags, + GError **error); + +GLIB_AVAILABLE_IN_2_66 +gchar * g_uri_join (GUriFlags flags, + const gchar *scheme, + const gchar *userinfo, + const gchar *host, + gint port, + const gchar *path, + const gchar *query, + const gchar *fragment); +GLIB_AVAILABLE_IN_2_66 +gchar * g_uri_join_with_user (GUriFlags flags, + const gchar *scheme, + const gchar *user, + const gchar *password, + const gchar *auth_params, + const gchar *host, + gint port, + const gchar *path, + const gchar *query, + const gchar *fragment); + +GLIB_AVAILABLE_IN_2_66 +GUri * g_uri_parse (const gchar *uri_string, + GUriFlags flags, + GError **error); +GLIB_AVAILABLE_IN_2_66 +GUri * g_uri_parse_relative (GUri *base_uri, + const gchar *uri_string, + GUriFlags flags, + GError **error); + +GLIB_AVAILABLE_IN_2_66 +gchar * g_uri_resolve_relative (const gchar *base_uri_string, + const gchar *uri_string, + GUriFlags flags, + GError **error); + +GLIB_AVAILABLE_IN_2_66 +GUri * g_uri_build (GUriFlags flags, + const gchar *scheme, + const gchar *userinfo, + const gchar *host, + gint port, + const gchar *path, + const gchar *query, + const gchar *fragment); +GLIB_AVAILABLE_IN_2_66 +GUri * g_uri_build_with_user (GUriFlags flags, + const gchar *scheme, + const gchar *user, + const gchar *password, + const gchar *auth_params, + const gchar *host, + gint port, + const gchar *path, + const gchar *query, + const gchar *fragment); + +/** + * GUriHideFlags: + * @G_URI_HIDE_USERINFO: Hide the userinfo. + * @G_URI_HIDE_PASSWORD: Hide the password. + * @G_URI_HIDE_AUTH_PARAMS: Hide the auth_params. + * @G_URI_HIDE_FRAGMENT: Hide the fragment. + * @G_URI_HIDE_NONE: No flags set. + * + * Flags describing what parts of the URI to hide in + * g_uri_to_string_partial(). Note that %G_URI_HIDE_PASSWORD and + * %G_URI_HIDE_AUTH_PARAMS will only work if the #GUri was parsed with + * the corresponding flags. + * + * Since: 2.66 + */ +GLIB_AVAILABLE_TYPE_IN_2_66 +typedef enum { + G_URI_HIDE_NONE = 0, + G_URI_HIDE_USERINFO = 1 << 0, + G_URI_HIDE_PASSWORD = 1 << 1, + G_URI_HIDE_AUTH_PARAMS = 1 << 2, + G_URI_HIDE_FRAGMENT = 1 << 3, +} GUriHideFlags; + +GLIB_AVAILABLE_IN_2_66 +char * g_uri_to_string (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +char * g_uri_to_string_partial (GUri *uri, + GUriHideFlags flags); + +GLIB_AVAILABLE_IN_2_66 +const gchar *g_uri_get_scheme (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +const gchar *g_uri_get_userinfo (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +const gchar *g_uri_get_user (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +const gchar *g_uri_get_password (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +const gchar *g_uri_get_auth_params (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +const gchar *g_uri_get_host (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +gint g_uri_get_port (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +const gchar *g_uri_get_path (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +const gchar *g_uri_get_query (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +const gchar *g_uri_get_fragment (GUri *uri); +GLIB_AVAILABLE_IN_2_66 +GUriFlags g_uri_get_flags (GUri *uri); + +GLIB_AVAILABLE_IN_2_66 +GHashTable * g_uri_parse_params (const gchar *params, + gssize length, + gchar separator, + gboolean case_insensitive); + +/** + * G_URI_ERROR: + * + * Error domain for URI methods. Errors in this domain will be from + * the #GUriError enumeration. See #GError for information on error + * domains. + * + * Since: 2.66 + */ +#define G_URI_ERROR (g_uri_error_quark ()) GLIB_AVAILABLE_MACRO_IN_2_66 +GLIB_AVAILABLE_IN_2_66 +GQuark g_uri_error_quark (void); + +/** + * GUriError: + * @G_URI_ERROR_MISC: miscellaneous error + * @G_URI_ERROR_BAD_SCHEME: the scheme of a URI could not be parsed. + * @G_URI_ERROR_BAD_USER: the user/userinfo of a URI could not be parsed. + * @G_URI_ERROR_BAD_PASSWORD: the password of a URI could not be parsed. + * @G_URI_ERROR_BAD_AUTH_PARAMS: the authentication parameters of a URI could not be parsed. + * @G_URI_ERROR_BAD_HOST: the host of a URI could not be parsed. + * @G_URI_ERROR_BAD_PORT: the port of a URI could not be parsed. + * @G_URI_ERROR_BAD_PATH: the path of a URI could not be parsed. + * @G_URI_ERROR_BAD_QUERY: the query of a URI could not be parsed. + * @G_URI_ERROR_BAD_FRAGMENT: the fragment of a URI could not be parsed. + * + * Error codes returned by #GUri methods. + * + * Since: 2.66 + */ +typedef enum { + G_URI_ERROR_MISC, + G_URI_ERROR_BAD_SCHEME, + G_URI_ERROR_BAD_USER, + G_URI_ERROR_BAD_PASSWORD, + G_URI_ERROR_BAD_AUTH_PARAMS, + G_URI_ERROR_BAD_HOST, + G_URI_ERROR_BAD_PORT, + G_URI_ERROR_BAD_PATH, + G_URI_ERROR_BAD_QUERY, + G_URI_ERROR_BAD_FRAGMENT, +} GUriError; + +/** + * G_URI_RESERVED_CHARS_GENERIC_DELIMITERS: + * + * Generic delimiters characters as defined in RFC 3986. Includes ":/?#[]@". + * + * Since: 2.16 + **/ +#define G_URI_RESERVED_CHARS_GENERIC_DELIMITERS ":/?#[]@" + +/** + * G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS: + * + * Subcomponent delimiter characters as defined in RFC 3986. Includes "!$&'()*+,;=". + * + * Since: 2.16 + **/ +#define G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS "!$&'()*+,;=" + +/** + * G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT: + * + * Allowed characters in path elements. Includes "!$&'()*+,;=:@". + * + * Since: 2.16 + **/ +#define G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS ":@" + +/** + * G_URI_RESERVED_CHARS_ALLOWED_IN_PATH: + * + * Allowed characters in a path. Includes "!$&'()*+,;=:@/". + * + * Since: 2.16 + **/ +#define G_URI_RESERVED_CHARS_ALLOWED_IN_PATH G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT "/" + +/** + * G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO: + * + * Allowed characters in userinfo as defined in RFC 3986. Includes "!$&'()*+,;=:". + * + * Since: 2.16 + **/ +#define G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS ":" + +GLIB_AVAILABLE_IN_ALL +char * g_uri_unescape_string (const char *escaped_string, + const char *illegal_characters); +GLIB_AVAILABLE_IN_ALL +char * g_uri_unescape_segment (const char *escaped_string, + const char *escaped_string_end, + const char *illegal_characters); + +GLIB_AVAILABLE_IN_ALL +char * g_uri_parse_scheme (const char *uri); +GLIB_AVAILABLE_IN_2_66 +const char *g_uri_peek_scheme (const char *uri); + +GLIB_AVAILABLE_IN_ALL +char * g_uri_escape_string (const char *unescaped, + const char *reserved_chars_allowed, + gboolean allow_utf8); + +GLIB_AVAILABLE_IN_2_66 +GBytes * g_uri_unescape_bytes (const char *escaped_string, + gssize length); +GLIB_AVAILABLE_IN_2_66 +char * g_uri_escape_bytes (const guchar *unescaped, + gsize length, + const char *reserved_chars_allowed); + +G_GNUC_END_IGNORE_DEPRECATIONS + +G_END_DECLS diff --git a/glib/gurifuncs.c b/glib/gurifuncs.c deleted file mode 100644 index a613c04b6..000000000 --- a/glib/gurifuncs.c +++ /dev/null @@ -1,252 +0,0 @@ -/* GIO - GLib Input, Output and Streaming Library - * - * Copyright (C) 2006-2007 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General - * Public License along with this library; if not, see . - * - * Author: Alexander Larsson - */ - -#include "config.h" - -#include "gurifuncs.h" - -#include -#include -#include -#include - -#include - -#include "config.h" - -/** - * SECTION:gurifuncs - * @title: URI Functions - * @short_description: manipulating URIs - * - * Functions for manipulating Universal Resource Identifiers (URIs) as - * defined by - * [RFC 3986](http://www.ietf.org/rfc/rfc3986.txt). - * It is highly recommended that you have read and - * understand RFC 3986 for understanding this API. - */ - -static int -unescape_character (const char *scanner) -{ - int first_digit; - int second_digit; - - first_digit = g_ascii_xdigit_value (*scanner++); - if (first_digit < 0) - return -1; - - second_digit = g_ascii_xdigit_value (*scanner++); - if (second_digit < 0) - return -1; - - return (first_digit << 4) | second_digit; -} - -/** - * g_uri_unescape_segment: - * @escaped_string: (nullable): A string, may be %NULL - * @escaped_string_end: (nullable): Pointer to end of @escaped_string, may be %NULL - * @illegal_characters: (nullable): An optional string of illegal characters not to be allowed, may be %NULL - * - * Unescapes a segment of an escaped string. - * - * If any of the characters in @illegal_characters or the character zero appears - * as an escaped character in @escaped_string then that is an error and %NULL - * will be returned. This is useful it you want to avoid for instance having a - * slash being expanded in an escaped path element, which might confuse pathname - * handling. - * - * Returns: an unescaped version of @escaped_string or %NULL on error. - * The returned string should be freed when no longer needed. As a - * special case if %NULL is given for @escaped_string, this function - * will return %NULL. - * - * Since: 2.16 - **/ -char * -g_uri_unescape_segment (const char *escaped_string, - const char *escaped_string_end, - const char *illegal_characters) -{ - const char *in; - char *out, *result; - gint character; - - if (escaped_string == NULL) - return NULL; - - if (escaped_string_end == NULL) - escaped_string_end = escaped_string + strlen (escaped_string); - - result = g_malloc (escaped_string_end - escaped_string + 1); - - out = result; - for (in = escaped_string; in < escaped_string_end; in++) - { - character = *in; - - if (*in == '%') - { - in++; - - if (escaped_string_end - in < 2) - { - /* Invalid escaped char (to short) */ - g_free (result); - return NULL; - } - - character = unescape_character (in); - - /* Check for an illegal character. We consider '\0' illegal here. */ - if (character <= 0 || - (illegal_characters != NULL && - strchr (illegal_characters, (char)character) != NULL)) - { - g_free (result); - return NULL; - } - - in++; /* The other char will be eaten in the loop header */ - } - *out++ = (char)character; - } - - *out = '\0'; - - return result; -} - -/** - * g_uri_unescape_string: - * @escaped_string: an escaped string to be unescaped. - * @illegal_characters: (nullable): a string of illegal characters not to be - * allowed, or %NULL. - * - * Unescapes a whole escaped string. - * - * If any of the characters in @illegal_characters or the character zero appears - * as an escaped character in @escaped_string then that is an error and %NULL - * will be returned. This is useful it you want to avoid for instance having a - * slash being expanded in an escaped path element, which might confuse pathname - * handling. - * - * Returns: an unescaped version of @escaped_string. The returned string - * should be freed when no longer needed. - * - * Since: 2.16 - **/ -char * -g_uri_unescape_string (const char *escaped_string, - const char *illegal_characters) -{ - return g_uri_unescape_segment (escaped_string, NULL, illegal_characters); -} - -/** - * g_uri_parse_scheme: - * @uri: a valid URI. - * - * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as: - * |[ - * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] - * ]| - * Common schemes include "file", "http", "svn+ssh", etc. - * - * Returns: The "Scheme" component of the URI, or %NULL on error. - * The returned string should be freed when no longer needed. - * - * Since: 2.16 - **/ -char * -g_uri_parse_scheme (const char *uri) -{ - const char *p; - char c; - - g_return_val_if_fail (uri != NULL, NULL); - - /* From RFC 3986 Decodes: - * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] - */ - - p = uri; - - /* Decode scheme: - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - */ - - if (!g_ascii_isalpha (*p)) - return NULL; - - while (1) - { - c = *p++; - - if (c == ':') - break; - - if (!(g_ascii_isalnum(c) || - c == '+' || - c == '-' || - c == '.')) - return NULL; - } - - return g_strndup (uri, p - uri - 1); -} - -/** - * g_uri_escape_string: - * @unescaped: the unescaped input string. - * @reserved_chars_allowed: (nullable): a string of reserved characters that - * are allowed to be used, or %NULL. - * @allow_utf8: %TRUE if the result can include UTF-8 characters. - * - * Escapes a string for use in a URI. - * - * Normally all characters that are not "unreserved" (i.e. ASCII alphanumerical - * characters plus dash, dot, underscore and tilde) are escaped. - * But if you specify characters in @reserved_chars_allowed they are not - * escaped. This is useful for the "reserved" characters in the URI - * specification, since those are allowed unescaped in some portions of - * a URI. - * - * Returns: an escaped version of @unescaped. The returned string should be - * freed when no longer needed. - * - * Since: 2.16 - **/ -char * -g_uri_escape_string (const char *unescaped, - const char *reserved_chars_allowed, - gboolean allow_utf8) -{ - GString *s; - - g_return_val_if_fail (unescaped != NULL, NULL); - - s = g_string_sized_new (strlen (unescaped) + 10); - - g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8); - - return g_string_free (s, FALSE); -} diff --git a/glib/gurifuncs.h b/glib/gurifuncs.h deleted file mode 100644 index e59a43c87..000000000 --- a/glib/gurifuncs.h +++ /dev/null @@ -1,83 +0,0 @@ -/* GIO - GLib Input, Output and Streaming Library - * - * Copyright (C) 2006-2007 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General - * Public License along with this library; if not, see . - * - * Author: Alexander Larsson - */ - -#ifndef __G_URI_FUNCS_H__ -#define __G_URI_FUNCS_H__ - -#if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION) -#error "Only can be included directly." -#endif - -#include - -G_BEGIN_DECLS - -/** - * G_URI_RESERVED_CHARS_GENERIC_DELIMITERS: - * - * Generic delimiters characters as defined in RFC 3986. Includes ":/?#[]@". - **/ -#define G_URI_RESERVED_CHARS_GENERIC_DELIMITERS ":/?#[]@" - -/** - * G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS: - * - * Subcomponent delimiter characters as defined in RFC 3986. Includes "!$&'()*+,;=". - **/ -#define G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS "!$&'()*+,;=" - -/** - * G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT: - * - * Allowed characters in path elements. Includes "!$&'()*+,;=:@". - **/ -#define G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS ":@" - -/** - * G_URI_RESERVED_CHARS_ALLOWED_IN_PATH: - * - * Allowed characters in a path. Includes "!$&'()*+,;=:@/". - **/ -#define G_URI_RESERVED_CHARS_ALLOWED_IN_PATH G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT "/" - -/** - * G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO: - * - * Allowed characters in userinfo as defined in RFC 3986. Includes "!$&'()*+,;=:". - **/ -#define G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS ":" - -GLIB_AVAILABLE_IN_ALL -char * g_uri_unescape_string (const char *escaped_string, - const char *illegal_characters); -GLIB_AVAILABLE_IN_ALL -char * g_uri_unescape_segment (const char *escaped_string, - const char *escaped_string_end, - const char *illegal_characters); -GLIB_AVAILABLE_IN_ALL -char * g_uri_parse_scheme (const char *uri); -GLIB_AVAILABLE_IN_ALL -char * g_uri_escape_string (const char *unescaped, - const char *reserved_chars_allowed, - gboolean allow_utf8); - -G_END_DECLS - -#endif /* __G_URI_FUNCS_H__ */ diff --git a/glib/guriprivate.h b/glib/guriprivate.h new file mode 100644 index 000000000..c1ade30f2 --- /dev/null +++ b/glib/guriprivate.h @@ -0,0 +1,36 @@ +/* + * Copyright © 2020 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + * Author: Marc-André Lureau + */ + +#ifndef __G_URI_PRIVATE_H__ +#define __G_URI_PRIVATE_H__ + +#include "gtypes.h" + +G_BEGIN_DECLS + +void +_uri_encoder (GString *out, + const guchar *start, + gsize length, + const gchar *reserved_chars_allowed, + gboolean allow_utf8); + +G_END_DECLS + +#endif /* __G_URI_PRIVATE_H__ */ diff --git a/glib/meson.build b/glib/meson.build index 3f24579b0..149d62f01 100644 --- a/glib/meson.build +++ b/glib/meson.build @@ -194,7 +194,7 @@ glib_sub_headers = files( 'gtypes.h', 'guuid.h', 'gunicode.h', - 'gurifuncs.h', + 'guri.h', 'gutils.h', 'gvarianttype.h', 'gvariant.h', @@ -283,7 +283,8 @@ glib_sources = files( 'gunibreak.c', 'gunicollate.c', 'gunidecomp.c', - 'gurifuncs.c', + 'guri.c', + 'guriprivate.h', 'gutils.c', 'gutilsprivate.h', 'guuid.c', diff --git a/glib/tests/uri.c b/glib/tests/uri.c index d292f33bf..dca99a28d 100644 --- a/glib/tests/uri.c +++ b/glib/tests/uri.c @@ -33,10 +33,10 @@ typedef struct char *hostname; char *expected_result; GConvertError expected_error; /* If failed */ -} ToUriTest; +} FileToUriTest; -ToUriTest -to_uri_tests[] = { +FileToUriTest +file_to_uri_tests[] = { { "/etc", NULL, "file:///etc"}, { "/etc", "", "file:///etc"}, { "/etc", "otherhost", "file://otherhost/etc"}, @@ -81,7 +81,7 @@ to_uri_tests[] = { * and would not occur in real life. */ { "/:", NULL, "file:///:"}, - { "/?&=", NULL, "file:///%3F&="}, + { "/?&=", NULL, "file:///%3F&="}, { "/", "0123456789-", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE}, { "/", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "file://ABCDEFGHIJKLMNOPQRSTUVWXYZ/"}, { "/", "abcdefghijklmnopqrstuvwxyz", "file://abcdefghijklmnopqrstuvwxyz/"}, @@ -101,10 +101,10 @@ typedef struct char *expected_filename; char *expected_hostname; GConvertError expected_error; /* If failed */ -} FromUriTest; +} FileFromUriTest; -FromUriTest -from_uri_tests[] = { +FileFromUriTest +file_from_uri_tests[] = { { "file:///etc", "/etc"}, { "file:/etc", "/etc"}, #ifdef G_OS_WIN32 @@ -163,23 +163,23 @@ from_uri_tests[] = { }; static void -run_to_uri_tests (void) +run_file_to_uri_tests (void) { int i; gchar *res; GError *error; - for (i = 0; i < G_N_ELEMENTS (to_uri_tests); i++) + for (i = 0; i < G_N_ELEMENTS (file_to_uri_tests); i++) { error = NULL; - res = g_filename_to_uri (to_uri_tests[i].filename, - to_uri_tests[i].hostname, + res = g_filename_to_uri (file_to_uri_tests[i].filename, + file_to_uri_tests[i].hostname, &error); if (res) - g_assert_cmpstr (res, ==, to_uri_tests[i].expected_result); + g_assert_cmpstr (res, ==, file_to_uri_tests[i].expected_result); else - g_assert_error (error, G_CONVERT_ERROR, to_uri_tests[i].expected_error); + g_assert_error (error, G_CONVERT_ERROR, file_to_uri_tests[i].expected_error); g_free (res); g_clear_error (&error); @@ -187,25 +187,26 @@ run_to_uri_tests (void) } static void -run_from_uri_tests (void) +run_file_from_uri_tests (void) { int i; gchar *res; gchar *hostname; GError *error; - for (i = 0; i < G_N_ELEMENTS (from_uri_tests); i++) + for (i = 0; i < G_N_ELEMENTS (file_from_uri_tests); i++) { error = NULL; - res = g_filename_from_uri (from_uri_tests[i].uri, + res = g_filename_from_uri (file_from_uri_tests[i].uri, &hostname, &error); #ifdef G_OS_WIN32 - if (from_uri_tests[i].expected_filename) + if (file_from_uri_tests[i].expected_filename) { gchar *p, *slash; - p = from_uri_tests[i].expected_filename = g_strdup (from_uri_tests[i].expected_filename); + p = file_from_uri_tests[i].expected_filename = + g_strdup (file_from_uri_tests[i].expected_filename); while ((slash = strchr (p, '/')) != NULL) { *slash = '\\'; @@ -214,10 +215,10 @@ run_from_uri_tests (void) } #endif if (res) - g_assert_cmpstr (res, ==, from_uri_tests[i].expected_filename); + g_assert_cmpstr (res, ==, file_from_uri_tests[i].expected_filename); else - g_assert_error (error, G_CONVERT_ERROR, from_uri_tests[i].expected_error); - g_assert_cmpstr (hostname, ==, from_uri_tests[i].expected_hostname); + g_assert_error (error, G_CONVERT_ERROR, file_from_uri_tests[i].expected_error); + g_assert_cmpstr (hostname, ==, file_from_uri_tests[i].expected_hostname); g_free (res); g_free (hostname); @@ -266,20 +267,20 @@ safe_strcmp_hostname (const gchar *a, const gchar *b) } static void -run_roundtrip_tests (void) +run_file_roundtrip_tests (void) { int i; gchar *uri, *hostname, *res; GError *error; - for (i = 0; i < G_N_ELEMENTS (to_uri_tests); i++) + for (i = 0; i < G_N_ELEMENTS (file_to_uri_tests); i++) { - if (to_uri_tests[i].expected_error != 0) + if (file_to_uri_tests[i].expected_error != 0) continue; error = NULL; - uri = g_filename_to_uri (to_uri_tests[i].filename, - to_uri_tests[i].hostname, + uri = g_filename_to_uri (file_to_uri_tests[i].filename, + file_to_uri_tests[i].hostname, &error); g_assert_no_error (error); @@ -287,8 +288,8 @@ run_roundtrip_tests (void) res = g_filename_from_uri (uri, &hostname, &error); g_assert_no_error (error); - g_assert (safe_strcmp_filename (to_uri_tests[i].filename, res) == 0); - g_assert (safe_strcmp_hostname (to_uri_tests[i].hostname, hostname) == 0); + g_assert_cmpint (safe_strcmp_filename (file_to_uri_tests[i].filename, res), ==, 0); + g_assert_cmpint (safe_strcmp_hostname (file_to_uri_tests[i].hostname, hostname), ==, 0); g_free (res); g_free (uri); g_free (hostname); @@ -329,17 +330,33 @@ run_uri_list_tests (void) static void test_uri_unescape (void) { + GBytes *bytes; gchar *s; + const gchar *data; + const gchar *escaped_segment = "%2Babc %4F---"; - s = g_uri_unescape_string ("%2Babc %4F", NULL); + s = g_uri_unescape_string ("%2Babc %4F", NULL); g_assert_cmpstr (s, ==, "+abc O"); g_free (s); - g_assert_cmpstr (g_uri_unescape_string ("%2Babc %4F", "+"), ==, NULL); - g_assert_cmpstr (g_uri_unescape_string ("%00abc %4F", "+/"), ==, NULL); - g_assert_cmpstr (g_uri_unescape_string ("%0", NULL), ==, NULL); - g_assert_cmpstr (g_uri_unescape_string ("%ra", NULL), ==, NULL); - g_assert_cmpstr (g_uri_unescape_string ("%2r", NULL), ==, NULL); - g_assert_cmpstr (g_uri_unescape_string (NULL, NULL), ==, NULL); + s = g_uri_unescape_segment (escaped_segment, escaped_segment + 10, NULL); + g_assert_cmpstr (s, ==, "+abc O"); + g_free (s); + g_assert_cmpstr (g_uri_unescape_string ("%2Babc %4F", "+"), ==, NULL); + g_assert_cmpstr (g_uri_unescape_string ("%00abc %4F", "+/"), ==, NULL); + g_assert_cmpstr (g_uri_unescape_string ("%0", NULL), ==, NULL); + g_assert_cmpstr (g_uri_unescape_string ("%ra", NULL), ==, NULL); + g_assert_cmpstr (g_uri_unescape_string ("%2r", NULL), ==, NULL); + g_assert_cmpstr (g_uri_unescape_string (NULL, NULL), ==, NULL); + + bytes = g_uri_unescape_bytes ("%00%00", -1); + g_assert_cmpint (g_bytes_get_size (bytes), ==, 2); + data = g_bytes_get_data (bytes, NULL); + g_assert_cmpint (data[0], ==, 0); + g_assert_cmpint (data[1], ==, 0); + g_bytes_unref (bytes); + + bytes = g_uri_unescape_bytes ("%%", -1); + g_assert_null (bytes); } static void @@ -359,20 +376,865 @@ test_uri_escape (void) s = g_uri_escape_string ("a+b:c\303\234", "+", TRUE); g_assert_cmpstr (s, ==, "a+b%3Ac\303\234"); g_free (s); + + s = g_uri_escape_bytes ((guchar*)"\0\0", 2, NULL); + g_assert_cmpstr (s, ==, "%00%00"); + g_free (s); } static void test_uri_scheme (void) { + const gchar *s1, *s2; gchar *s; s = g_uri_parse_scheme ("ftp://ftp.gtk.org"); g_assert_cmpstr (s, ==, "ftp"); g_free (s); + s = g_uri_parse_scheme ("1bad:"); - g_assert (s == NULL); + g_assert_null (s); s = g_uri_parse_scheme ("bad"); - g_assert (s == NULL); + g_assert_null (s); + s = g_uri_parse_scheme ("99http://host/path"); + g_assert_null (s); + s = g_uri_parse_scheme (".http://host/path"); + g_assert_null (s); + s = g_uri_parse_scheme ("+http://host/path"); + g_assert_null (s); + + s1 = g_uri_peek_scheme ("ftp://ftp.gtk.org"); + g_assert_cmpstr (s1, ==, "ftp"); + s2 = g_uri_peek_scheme ("FTP://ftp.gtk.org"); + g_assert_cmpstr (s2, ==, "ftp"); + g_assert_true (s1 == s2); + s1 = g_uri_peek_scheme ("1bad:"); + g_assert_null (s1); + s1 = g_uri_peek_scheme ("bad"); + g_assert_null (s1); +} + +typedef struct { + const gchar *scheme; + const gchar *userinfo; + const gchar *host; + gint port; + const gchar *path; + const gchar *query; + const gchar *fragment; +} UriParts; + +typedef struct { + const gchar *orig; + const UriParts parts; +} UriAbsoluteTest; + +static const UriAbsoluteTest absolute_tests[] = { + { "foo:", + { "foo", NULL, NULL, -1, "", NULL, NULL } + }, + { "file:/dev/null", + { "file", NULL, NULL, -1, "/dev/null", NULL, NULL } + }, + { "file:///dev/null", + { "file", NULL, "", -1, "/dev/null", NULL, NULL } + }, + { "ftp://user@host/path", + { "ftp", "user", "host", -1, "/path", NULL, NULL } + }, + { "ftp://user@host:9999/path", + { "ftp", "user", "host", 9999, "/path", NULL, NULL } + }, + { "ftp://user:password@host/path", + { "ftp", "user:password", "host", -1, "/path", NULL, NULL } + }, + { "ftp://user:password@host:9999/path", + { "ftp", "user:password", "host", 9999, "/path", NULL, NULL } + }, + { "ftp://user:password@host", + { "ftp", "user:password", "host", -1, "", NULL, NULL } + }, + { "http://us%65r@host", + { "http", "user", "host", -1, "", NULL, NULL } + }, + { "http://us%40r@host", + { "http", "us@r", "host", -1, "", NULL, NULL } + }, + { "http://us%3ar@host", + { "http", "us:r", "host", -1, "", NULL, NULL } + }, + { "http://us%2fr@host", + { "http", "us/r", "host", -1, "", NULL, NULL } + }, + { "http://us%3fr@host", + { "http", "us?r", "host", -1, "", NULL, NULL } + }, + { "http://host?query", + { "http", NULL, "host", -1, "", "query", NULL } + }, + { "http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fchildparam%3Dchildvalue¶m=value", + { "http", NULL, "host", -1, "/path", "query=http://host/path?childparam=childvalue¶m=value", NULL } + }, + { "http://control-chars/%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F", + { "http", NULL, "control-chars", -1, "/\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F", NULL, NULL } + }, + { "http://space/%20", + { "http", NULL, "space", -1, "/ ", NULL, NULL } + }, + { "http://delims/%3C%3E%23%25%22", + { "http", NULL, "delims", -1, "/<>#%\"", NULL, NULL } + }, + { "http://unwise-chars/%7B%7D%7C%5C%5E%5B%5D%60", + { "http", NULL, "unwise-chars", -1, "/{}|\\^[]`", NULL, NULL } + }, + + /* From RFC 2732 */ + { "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html", + { "http", NULL, "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210", 80, "/index.html", NULL, NULL } + }, + { "http://[1080:0:0:0:8:800:200C:417A]/index.html", + { "http", NULL, "1080:0:0:0:8:800:200C:417A", -1, "/index.html", NULL, NULL } + }, + { "http://[3ffe:2a00:100:7031::1]", + { "http", NULL, "3ffe:2a00:100:7031::1", -1, "", NULL, NULL } + }, + { "http://[1080::8:800:200C:417A]/foo", + { "http", NULL, "1080::8:800:200C:417A", -1, "/foo", NULL, NULL } + }, + { "http://[::192.9.5.5]/ipng", + { "http", NULL, "::192.9.5.5", -1, "/ipng", NULL, NULL } + }, + { "http://[::FFFF:129.144.52.38]:80/index.html", + { "http", NULL, "::FFFF:129.144.52.38", 80, "/index.html", NULL, NULL } + }, + { "http://[2010:836B:4179::836B:4179]", + { "http", NULL, "2010:836B:4179::836B:4179", -1, "", NULL, NULL } + }, + + /* some problematic URIs that are handled differently in libsoup */ + { "http://host/path with spaces", + { "http", NULL, "host", -1, "/path with spaces", NULL, NULL } + }, + { " http://host/path", + { "http", NULL, "host", -1, "/path", NULL, NULL } + }, + { "http://host/path ", + { "http", NULL, "host", -1, "/path", NULL, NULL } + }, + { "http://host ", + { "http", NULL, "host", -1, "", NULL, NULL } + }, + { "http://host:999 ", + { "http", NULL, "host", 999, "", NULL, NULL } + }, + { "http://host/pa\nth", + { "http", NULL, "host", -1, "/path", NULL, NULL } + }, + { "http:\r\n//host/path", + { "http", NULL, "host", -1, "/path", NULL, NULL } + }, + { "http://\thost/path", + { "http", NULL, "host", -1, "/path", NULL, NULL } + }, + + /* Bug 594405; 0-length is different from not-present */ + { "http://host/path?", + { "http", NULL, "host", -1, "/path", "", NULL } + }, + { "http://host/path#", + { "http", NULL, "host", -1, "/path", NULL, "" }, + }, + + /* Bug 590524; ignore bad %-encoding */ + { "http://host/path%", + { "http", NULL, "host", -1, "/path%", NULL, NULL } + }, + { "http://h%ost/path", + { "http", NULL, "h%ost", -1, "/path", NULL, NULL } + }, + { "http://host/path%%", + { "http", NULL, "host", -1, "/path%%", NULL, NULL } + }, + { "http://host/path%%%", + { "http", NULL, "host", -1, "/path%%%", NULL, NULL } + }, + { "http://host/path%/x/", + { "http", NULL, "host", -1, "/path%/x/", NULL, NULL } + }, + { "http://host/path%0x/", + { "http", NULL, "host", -1, "/path%0x/", NULL, NULL } + }, + { "http://host/path%ax", + { "http", NULL, "host", -1, "/path%ax", NULL, NULL } + }, + + /* GUri doesn't %-encode non-ASCII characters */ + { "http://host/p\xc3\xa4th/", + { "http", NULL, "host", -1, "/p\xc3\xa4th/", NULL, NULL } + }, + + { "HTTP:////////////////", + { "http", NULL, "", -1, "//////////////", NULL, NULL } + }, + + { "http://@host", + { "http", "", "host", -1, "", NULL, NULL } + }, + { "http://:@host", + { "http", ":", "host", -1, "", NULL, NULL } + }, + + /* IPv6 scope ID parsing (both correct and incorrect) */ + { "http://[fe80::dead:beef%em1]/", + { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } + }, + { "http://[fe80::dead:beef%25em1]/", + { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } + }, + { "http://[fe80::dead:beef%10]/", + { "http", NULL, "fe80::dead:beef%10", -1, "/", NULL, NULL } + }, + + /* ".." past top */ + { "http://example.com/..", + { "http", NULL, "example.com", -1, "/..", NULL, NULL } + }, + + /* scheme parsing */ + { "foo0://host/path", + { "foo0", NULL, "host", -1, "/path", NULL, NULL } }, + { "f0.o://host/path", + { "f0.o", NULL, "host", -1, "/path", NULL, NULL } }, + { "http++://host/path", + { "http++", NULL, "host", -1, "/path", NULL, NULL } }, + { "http-ish://host/path", + { "http-ish", NULL, "host", -1, "/path", NULL, NULL } }, + + /* IPv6 scope ID parsing (both correct and incorrect) */ + { "http://[fe80::dead:beef%em1]/", + { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } }, + { "http://[fe80::dead:beef%25em1]/", + { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } }, + { "http://[fe80::dead:beef%10]/", + { "http", NULL, "fe80::dead:beef%10", -1, "/", NULL, NULL } }, +}; +static int num_absolute_tests = G_N_ELEMENTS (absolute_tests); + +static void +test_uri_parsing_absolute (void) +{ + int i; + + for (i = 0; i < num_absolute_tests; i++) + { + const UriAbsoluteTest *test = &absolute_tests[i]; + GError *error = NULL; + GUri *uri; + + uri = g_uri_parse (test->orig, G_URI_FLAGS_NONE, &error); + g_assert_no_error (error); + + g_assert_cmpstr (g_uri_get_scheme (uri), ==, test->parts.scheme); + g_assert_cmpstr (g_uri_get_userinfo (uri), ==, test->parts.userinfo); + g_assert_cmpstr (g_uri_get_host (uri), ==, test->parts.host); + g_assert_cmpint (g_uri_get_port (uri), ==, test->parts.port); + g_assert_cmpstr (g_uri_get_path (uri), ==, test->parts.path); + g_assert_cmpstr (g_uri_get_query (uri), ==, test->parts.query); + g_assert_cmpstr (g_uri_get_fragment (uri), ==, test->parts.fragment); + + g_uri_unref (uri); + } +} + +typedef struct { + const gchar *orig, *resolved; + UriParts parts; +} UriRelativeTest; + +/* This all comes from RFC 3986 */ +static const char *relative_test_base = "http://a/b/c/d;p?q"; +static const UriRelativeTest relative_tests[] = { + { "g:h", "g:h", + { "g", NULL, NULL, -1, "h", NULL, NULL } }, + { "g", "http://a/b/c/g", + { "http", NULL, "a", -1, "/b/c/g", NULL, NULL } }, + { "./g", "http://a/b/c/g", + { "http", NULL, "a", -1, "/b/c/g", NULL, NULL } }, + { "g/", "http://a/b/c/g/", + { "http", NULL, "a", -1, "/b/c/g/", NULL, NULL } }, + { "/g", "http://a/g", + { "http", NULL, "a", -1, "/g", NULL, NULL } }, + { "//g", "http://g", + { "http", NULL, "g", -1, "", NULL, NULL } }, + { "?y", "http://a/b/c/d;p?y", + { "http", NULL, "a", -1, "/b/c/d;p", "y", NULL } }, + { "g?y", "http://a/b/c/g?y", + { "http", NULL, "a", -1, "/b/c/g", "y", NULL } }, + { "#s", "http://a/b/c/d;p?q#s", + { "http", NULL, "a", -1, "/b/c/d;p", "q", "s" } }, + { "g#s", "http://a/b/c/g#s", + { "http", NULL, "a", -1, "/b/c/g", NULL, "s" } }, + { "g?y#s", "http://a/b/c/g?y#s", + { "http", NULL, "a", -1, "/b/c/g", "y", "s" } }, + { ";x", "http://a/b/c/;x", + { "http", NULL, "a", -1, "/b/c/;x", NULL, NULL } }, + { "g;x", "http://a/b/c/g;x", + { "http", NULL, "a", -1, "/b/c/g;x", NULL, NULL } }, + { "g;x?y#s", "http://a/b/c/g;x?y#s", + { "http", NULL, "a", -1, "/b/c/g;x", "y", "s" } }, + { ".", "http://a/b/c/", + { "http", NULL, "a", -1, "/b/c/", NULL, NULL } }, + { "./", "http://a/b/c/", + { "http", NULL, "a", -1, "/b/c/", NULL, NULL } }, + { "..", "http://a/b/", + { "http", NULL, "a", -1, "/b/", NULL, NULL } }, + { "../", "http://a/b/", + { "http", NULL, "a", -1, "/b/", NULL, NULL } }, + { "../g", "http://a/b/g", + { "http", NULL, "a", -1, "/b/g", NULL, NULL } }, + { "../..", "http://a/", + { "http", NULL, "a", -1, "/", NULL, NULL } }, + { "../../", "http://a/", + { "http", NULL, "a", -1, "/", NULL, NULL } }, + { "../../g", "http://a/g", + { "http", NULL, "a", -1, "/g", NULL, NULL } }, + { "", "http://a/b/c/d;p?q", + { "http", NULL, "a", -1, "/b/c/d;p", "q", NULL } }, + { "../../../g", "http://a/g", + { "http", NULL, "a", -1, "/g", NULL, NULL } }, + { "../../../../g", "http://a/g", + { "http", NULL, "a", -1, "/g", NULL, NULL } }, + { "/./g", "http://a/g", + { "http", NULL, "a", -1, "/g", NULL, NULL } }, + { "/../g", "http://a/g", + { "http", NULL, "a", -1, "/g", NULL, NULL } }, + { "g.", "http://a/b/c/g.", + { "http", NULL, "a", -1, "/b/c/g.", NULL, NULL } }, + { ".g", "http://a/b/c/.g", + { "http", NULL, "a", -1, "/b/c/.g", NULL, NULL } }, + { "g..", "http://a/b/c/g..", + { "http", NULL, "a", -1, "/b/c/g..", NULL, NULL } }, + { "..g", "http://a/b/c/..g", + { "http", NULL, "a", -1, "/b/c/..g", NULL, NULL } }, + { "./../g", "http://a/b/g", + { "http", NULL, "a", -1, "/b/g", NULL, NULL } }, + { "./g/.", "http://a/b/c/g/", + { "http", NULL, "a", -1, "/b/c/g/", NULL, NULL } }, + { "g/./h", "http://a/b/c/g/h", + { "http", NULL, "a", -1, "/b/c/g/h", NULL, NULL } }, + { "g/../h", "http://a/b/c/h", + { "http", NULL, "a", -1, "/b/c/h", NULL, NULL } }, + { "g;x=1/./y", "http://a/b/c/g;x=1/y", + { "http", NULL, "a", -1, "/b/c/g;x=1/y", NULL, NULL } }, + { "g;x=1/../y", "http://a/b/c/y", + { "http", NULL, "a", -1, "/b/c/y", NULL, NULL } }, + { "g?y/./x", "http://a/b/c/g?y/./x", + { "http", NULL, "a", -1, "/b/c/g", "y/./x", NULL } }, + { "g?y/../x", "http://a/b/c/g?y/../x", + { "http", NULL, "a", -1, "/b/c/g", "y/../x", NULL } }, + { "g#s/./x", "http://a/b/c/g#s/./x", + { "http", NULL, "a", -1, "/b/c/g", NULL, "s/./x" } }, + { "g#s/../x", "http://a/b/c/g#s/../x", + { "http", NULL, "a", -1, "/b/c/g", NULL, "s/../x" } }, + { "http:g", "http:g", + { "http", NULL, NULL, -1, "g", NULL, NULL } }, + { "http://a/../..", "http://a/", + { "http", NULL, "a", -1, "/", NULL, NULL } } +}; +static int num_relative_tests = G_N_ELEMENTS (relative_tests); + +static void +test_uri_parsing_relative (void) +{ + int i; + GUri *base, *uri; + GError *error = NULL; + gchar *resolved; + + base = g_uri_parse (relative_test_base, G_URI_FLAGS_NONE, &error); + g_assert_no_error (error); + + for (i = 0; i < num_relative_tests; i++) + { + const UriRelativeTest *test = &relative_tests[i]; + gchar *tostring; + + uri = g_uri_parse_relative (base, test->orig, G_URI_FLAGS_NONE, &error); + g_assert_no_error (error); + + g_assert_cmpstr (g_uri_get_scheme (uri), ==, test->parts.scheme); + g_assert_cmpstr (g_uri_get_userinfo (uri), ==, test->parts.userinfo); + g_assert_cmpstr (g_uri_get_host (uri), ==, test->parts.host); + g_assert_cmpint (g_uri_get_port (uri), ==, test->parts.port); + g_assert_cmpstr (g_uri_get_path (uri), ==, test->parts.path); + g_assert_cmpstr (g_uri_get_query (uri), ==, test->parts.query); + g_assert_cmpstr (g_uri_get_fragment (uri), ==, test->parts.fragment); + + tostring = g_uri_to_string (uri); + g_assert_cmpstr (tostring, ==, test->resolved); + g_free (tostring); + + g_uri_unref (uri); + + resolved = g_uri_resolve_relative (relative_test_base, test->orig, G_URI_FLAGS_NONE, &error); + g_assert_no_error (error); + g_assert_cmpstr (resolved, ==, test->resolved); + g_free (resolved); + } + uri = g_uri_parse_relative (base, "%%", G_URI_FLAGS_PARSE_STRICT, &error); + g_assert_null (uri); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PATH); + g_clear_error (&error); + + g_uri_unref (base); + + resolved = g_uri_resolve_relative (NULL, "http://a", G_URI_FLAGS_NONE, &error); + g_assert_no_error (error); + g_assert_cmpstr (resolved, ==, "http://a"); + g_free (resolved); + + resolved = g_uri_resolve_relative ("http://a", "b", G_URI_FLAGS_NONE, &error); + g_assert_no_error (error); + g_assert_cmpstr (resolved, ==, "http://a/b"); + g_free (resolved); + + resolved = g_uri_resolve_relative (NULL, "a", G_URI_FLAGS_NONE, &error); + g_assert_null (resolved); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_MISC); + g_clear_error (&error); + + resolved = g_uri_resolve_relative ("../b", "a", G_URI_FLAGS_NONE, &error); + g_assert_null (resolved); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_MISC); + g_clear_error (&error); + + resolved = g_uri_resolve_relative ("%%", "a", G_URI_FLAGS_NONE, &error); + g_assert_null (resolved); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_MISC); + g_clear_error (&error); +} + +static void +test_uri_to_string (void) +{ + GUri *uri; + gchar *tostring; + + uri = g_uri_build (G_URI_FLAGS_NONE, "scheme", "userinfo", "host", 1234, + "/path", "query", "fragment"); + + tostring = g_uri_to_string (uri); + g_assert_cmpstr (tostring, ==, "scheme://userinfo@host:1234/path?query#fragment"); + g_free (tostring); + + g_uri_unref (uri); + + uri = g_uri_build_with_user (G_URI_FLAGS_NONE, "scheme", "user", "pass", "auth", "host", 1234, + "/path", "query", "fragment"); + tostring = g_uri_to_string (uri); + g_assert_cmpstr (tostring, ==, "scheme://user%3Apass%3Bauth@host:1234/path?query#fragment"); + g_free (tostring); + tostring = g_uri_to_string_partial (uri, G_URI_HIDE_USERINFO); + g_assert_cmpstr (tostring, ==, "scheme://host:1234/path?query#fragment"); + g_free (tostring); + tostring = g_uri_to_string_partial (uri, G_URI_HIDE_FRAGMENT); + g_assert_cmpstr (tostring, ==, "scheme://user%3Apass%3Bauth@host:1234/path?query"); + g_free (tostring); + g_uri_unref (uri); + + uri = g_uri_build_with_user (G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS, + "scheme", "user", "pass", "auth", "host", 1234, + "/path", "query", "fragment"); + tostring = g_uri_to_string (uri); + g_assert_cmpstr (tostring, ==, "scheme://user:pass;auth@host:1234/path?query#fragment"); + g_free (tostring); + tostring = g_uri_to_string_partial (uri, G_URI_HIDE_PASSWORD); + g_assert_cmpstr (tostring, ==, "scheme://user;auth@host:1234/path?query#fragment"); + g_free (tostring); + tostring = g_uri_to_string_partial (uri, G_URI_HIDE_AUTH_PARAMS); + g_assert_cmpstr (tostring, ==, "scheme://user:pass@host:1234/path?query#fragment"); + g_free (tostring); + g_uri_unref (uri); +} + +static void +test_uri_build (void) +{ + GUri *uri; + + uri = g_uri_build (G_URI_FLAGS_NON_DNS, "scheme", "userinfo", "host", 1234, + "/path", "query", "fragment"); + + /* check ref/unref */ + g_uri_ref (uri); + g_uri_unref (uri); + + g_assert_cmpint (g_uri_get_flags (uri), ==, G_URI_FLAGS_NON_DNS); + g_assert_cmpstr (g_uri_get_scheme (uri), ==, "scheme"); + g_assert_cmpstr (g_uri_get_userinfo (uri), ==, "userinfo"); + g_assert_cmpstr (g_uri_get_host (uri), ==, "host"); + g_assert_cmpint (g_uri_get_port (uri), ==, 1234); + g_assert_cmpstr (g_uri_get_path (uri), ==, "/path"); + g_assert_cmpstr (g_uri_get_query (uri), ==, "query"); + g_assert_cmpstr (g_uri_get_fragment (uri), ==, "fragment"); + g_assert_cmpstr (g_uri_get_user (uri), ==, NULL); + g_assert_cmpstr (g_uri_get_password (uri), ==, NULL); + g_uri_unref (uri); + + uri = g_uri_build_with_user (G_URI_FLAGS_NON_DNS, "scheme", "user", "password", + "authparams", "host", 1234, + "/path", "query", "fragment"); + + g_assert_cmpint (g_uri_get_flags (uri), ==, G_URI_FLAGS_NON_DNS); + g_assert_cmpstr (g_uri_get_scheme (uri), ==, "scheme"); + g_assert_cmpstr (g_uri_get_userinfo (uri), ==, "user:password;authparams"); + g_assert_cmpstr (g_uri_get_host (uri), ==, "host"); + g_assert_cmpint (g_uri_get_port (uri), ==, 1234); + g_assert_cmpstr (g_uri_get_path (uri), ==, "/path"); + g_assert_cmpstr (g_uri_get_query (uri), ==, "query"); + g_assert_cmpstr (g_uri_get_fragment (uri), ==, "fragment"); + g_assert_cmpstr (g_uri_get_user (uri), ==, "user"); + g_assert_cmpstr (g_uri_get_password (uri), ==, "password"); + g_assert_cmpstr (g_uri_get_auth_params (uri), ==, "authparams"); + g_uri_unref (uri); + + uri = g_uri_build_with_user (G_URI_FLAGS_ENCODED, "scheme", "user%01", "password%02", + "authparams%03", "host", 1234, + "/path", "query", "fragment"); + g_assert_cmpstr (g_uri_get_userinfo (uri), ==, "user%01:password%02;authparams%03"); + g_uri_unref (uri); + + uri = g_uri_build_with_user (G_URI_FLAGS_ENCODED, "scheme", NULL, NULL, + NULL, "host", 1234, + "/path", "query", "fragment"); + g_assert_null (g_uri_get_userinfo (uri)); + g_uri_unref (uri); +} + +static void +test_uri_split (void) +{ + gchar *scheme = NULL; + gchar *userinfo = NULL; + gchar *user = NULL; + gchar *pass = NULL; + gchar *authparams = NULL; + gchar *host = NULL; + gchar *path = NULL; + gchar *query = NULL; + gchar *fragment = NULL; + GError *error = NULL; + gint port; + + g_uri_split ("scheme://user%3Apass%3Bauth@host:1234/path?query#fragment", + G_URI_FLAGS_NONE, + &scheme, + &userinfo, + &host, + &port, + &path, + &query, + &fragment, + &error); + g_assert_no_error (error); + g_assert_cmpstr (scheme, ==, "scheme"); + g_assert_cmpstr (userinfo, ==, "user:pass;auth"); + g_assert_cmpstr (host, ==, "host"); + g_assert_cmpint (port, ==, 1234); + g_assert_cmpstr (path, ==, "/path"); + g_assert_cmpstr (query, ==, "query"); + g_assert_cmpstr (fragment, ==, "fragment"); + g_free (scheme); + g_free (userinfo); + g_free (host); + g_free (path); + g_free (query); + g_free (fragment); + + g_uri_split ("scheme://user%3Apass%3Bauth@h%01st:1234/path?query#fragment", + G_URI_FLAGS_ENCODED, + NULL, + NULL, + &host, + NULL, + NULL, + NULL, + NULL, + &error); + g_assert_no_error (error); + g_assert_cmpstr (host, ==, "h\001st"); + g_free (host); + + g_uri_split ("scheme://@@@host:1234/path?query#fragment", + G_URI_FLAGS_ENCODED, + NULL, + &userinfo, + NULL, + NULL, + NULL, + NULL, + NULL, + &error); + g_assert_no_error (error); + g_assert_cmpstr (userinfo, ==, "@@"); + g_free (userinfo); + + + g_uri_split ("http://f;oo/", + G_URI_FLAGS_NONE, + NULL, + NULL, + NULL, + NULL, + &path, + NULL, + NULL, + &error); + g_assert_no_error (error); + g_assert_cmpstr (path, ==, ";oo/"); + g_free (path); + + g_uri_split_with_user ("scheme://user:pass;auth@host:1234/path?query#fragment", + G_URI_FLAGS_HAS_AUTH_PARAMS|G_URI_FLAGS_HAS_PASSWORD, + NULL, + &user, + &pass, + &authparams, + NULL, + NULL, + NULL, + NULL, + NULL, + &error); + g_assert_no_error (error); + g_assert_cmpstr (user, ==, "user"); + g_assert_cmpstr (pass, ==, "pass"); + g_assert_cmpstr (authparams, ==, "auth"); + g_free (user); + g_free (pass); + g_free (authparams); + + g_uri_split_network ("scheme://user:pass;auth@host:1234/path?query#fragment", + G_URI_FLAGS_NONE, + NULL, + NULL, + NULL, + &error); + g_assert_no_error (error); + + g_uri_split_network ("scheme://user:pass;auth@host:1234/path?query#fragment", + G_URI_FLAGS_NONE, + &scheme, + &host, + &port, + &error); + g_assert_no_error (error); + g_assert_cmpstr (scheme, ==, "scheme"); + g_assert_cmpstr (host, ==, "host"); + g_assert_cmpint (port, ==, 1234); + g_free (scheme); + g_free (host); + + g_uri_split_network ("%00", + G_URI_FLAGS_NONE, NULL, NULL, NULL, &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PATH); + g_clear_error (&error); + + g_uri_split_network ("/a", + G_URI_FLAGS_NONE, + &scheme, + &host, + &port, + &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME); + g_clear_error (&error); + + g_uri_split_network ("schme:#", + G_URI_FLAGS_NONE, + &scheme, + &host, + &port, + &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST); + g_clear_error (&error); + + g_uri_split_network ("scheme://[]/a", + G_URI_FLAGS_NONE, NULL, NULL, NULL, &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST); + g_clear_error (&error); + + g_uri_split_network ("scheme://user%00:pass;auth@host", + G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS, + NULL, NULL, NULL, &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_USER); + g_clear_error (&error); + + g_uri_split_network ("scheme://user:pass%00;auth@host", + G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS, + NULL, NULL, NULL, &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PASSWORD); + g_clear_error (&error); + + g_uri_split_network ("scheme://user:pass;auth@host:1234/path?quer%00y#fragment", + G_URI_FLAGS_NONE, + NULL, NULL, NULL, &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_QUERY); + g_clear_error (&error); + + g_uri_split_network ("scheme://use%00r:pass;auth@host:1234/path", + G_URI_FLAGS_NONE, + NULL, NULL, NULL, &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_USER); + g_clear_error (&error); + + g_uri_split ("scheme://user:pass;auth@host:1234/path?query#fragm%00ent", + G_URI_FLAGS_NONE, + &scheme, + &userinfo, + &host, + &port, + &path, + &query, + &fragment, + &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_FRAGMENT); + g_clear_error (&error); + + g_uri_split_with_user ("scheme://user:pa%x0s;auth@host:1234/path?query#fragment", + G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_PARSE_STRICT, + &scheme, + &user, + &pass, + &authparams, + &host, + &port, + &path, + &query, + &fragment, + &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PASSWORD); + g_clear_error (&error); + + g_uri_split_with_user ("scheme://user:pass;auth%00@host", + G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS, + &scheme, + &user, + &pass, + &authparams, + &host, + &port, + &path, + &query, + &fragment, + &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_AUTH_PARAMS); + g_clear_error (&error); + + g_uri_split_network ("scheme://user:pass%00;auth@host", + G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS, + NULL, NULL, NULL, &error); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PASSWORD); + g_clear_error (&error); + +} + +static void +test_uri_is_valid (void) +{ + GError *error = NULL; + + g_assert_true (g_uri_is_valid ("http://[::192.9.5.5]/ipng", G_URI_FLAGS_NONE, NULL)); + g_assert_true (g_uri_is_valid ("http://127.127.127.127/", G_URI_FLAGS_NONE, NULL)); + g_assert_true (g_uri_is_valid ("http://127.127.127.b/", G_URI_FLAGS_NONE, NULL)); + g_assert_true (g_uri_is_valid ("http://\xc3\x89XAMPLE.COM/", G_URI_FLAGS_NONE, NULL)); + + g_assert_true (g_uri_is_valid (" \r http\t://f oo \t\n ", G_URI_FLAGS_NONE, NULL)); + g_assert_true (g_uri_is_valid (" \r http\t://f oo \t\n ", G_URI_FLAGS_PARSE_STRICT, NULL)); + + g_assert_false (g_uri_is_valid ("http://[::192.9.5.5/ipng", G_URI_FLAGS_NONE, &error)); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST); + g_clear_error (&error); + + g_assert_false (g_uri_is_valid ("http://[fe80::dead:beef%wef%]/", G_URI_FLAGS_NONE, &error)); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST); + g_clear_error (&error); + + g_assert_false (g_uri_is_valid ("http://%00/", G_URI_FLAGS_NON_DNS, &error)); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST); + g_clear_error (&error); + + g_assert_true (g_uri_is_valid ("http://foo/", G_URI_FLAGS_NON_DNS, &error)); + + g_assert_false (g_uri_is_valid ("http://%00/", G_URI_FLAGS_NONE, &error)); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST); + g_clear_error (&error); + + g_assert_false (g_uri_is_valid ("http://%30.%30.%30.%30/", G_URI_FLAGS_NONE, &error)); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST); + g_clear_error (&error); + + g_assert_false (g_uri_is_valid ("http://host:port", G_URI_FLAGS_NONE, &error)); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT); + g_clear_error (&error); + + g_assert_false (g_uri_is_valid ("http://host:65536", G_URI_FLAGS_NONE, &error)); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT); + g_clear_error (&error); + + g_assert_false (g_uri_is_valid ("http://host:6553l", G_URI_FLAGS_NONE, &error)); + g_assert_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT); + g_clear_error (&error); +} + +static void +test_uri_parse_params (void) +{ + GHashTable *params; + + params = g_uri_parse_params ("", G_URI_FLAGS_NONE, '&', FALSE); + g_assert_cmpint (g_hash_table_size (params), ==, 0); + g_hash_table_unref (params); + + params = g_uri_parse_params ("p1=foo&p2=bar", -1, '&', FALSE); + g_assert_cmpint (g_hash_table_size (params), ==, 2); + g_assert_cmpstr (g_hash_table_lookup (params, "p1"), ==, "foo"); + g_assert_cmpstr (g_hash_table_lookup (params, "p2"), ==, "bar"); + g_hash_table_unref (params); + + params = g_uri_parse_params ("p1=foo&&P1=bar", -1, '&', FALSE); + g_assert_null (params); + params = g_uri_parse_params ("%00=foo", -1, '&', FALSE); + g_assert_null (params); + params = g_uri_parse_params ("p1=%00", -1, '&', FALSE); + g_assert_null (params); + + params = g_uri_parse_params ("p1=foo&P1=bar", -1, '&', TRUE); + g_assert_cmpint (g_hash_table_size (params), ==, 1); + g_assert_cmpstr (g_hash_table_lookup (params, "p1"), ==, "bar"); + g_hash_table_unref (params); +} + +static void +test_uri_join (void) +{ + gchar *uri = NULL; + + uri = g_uri_join_with_user (G_URI_FLAGS_NONE, "scheme", "user\001", "pass\002", "authparams\003", + "host", 9876, "/path", "query", "fragment"); + g_assert_cmpstr (uri, ==, "scheme://user%01:pass%02;authparams%03@host:9876/path?query#fragment"); + g_free (uri); + + uri = g_uri_join_with_user (G_URI_FLAGS_NONE, "scheme", "user\001", "pass\002", "authparams\003", + "::192.9.5.5", 9876, "/path", "query", "fragment"); + g_assert_cmpstr (uri, ==, "scheme://user%01:pass%02;authparams%03@[::192.9.5.5]:9876/path?query#fragment"); + g_free (uri); + + uri = g_uri_join_with_user (G_URI_FLAGS_ENCODED, + "scheme", "user%01", "pass%02", "authparams%03", + "::192.9.5.5", 9876, "/path", "query", "fragment"); + g_assert_cmpstr (uri, ==, + "scheme://user%01:pass%02;authparams%03@[::192.9.5.5]:9876/path?query#fragment"); + g_free (uri); } int @@ -381,13 +1243,21 @@ main (int argc, { g_test_init (&argc, &argv, NULL); - g_test_add_func ("/uri/to-uri", run_to_uri_tests); - g_test_add_func ("/uri/from-uri", run_from_uri_tests); - g_test_add_func ("/uri/roundtrip", run_roundtrip_tests); + g_test_add_func ("/uri/file-to-uri", run_file_to_uri_tests); + g_test_add_func ("/uri/file-from-uri", run_file_from_uri_tests); + g_test_add_func ("/uri/file-roundtrip", run_file_roundtrip_tests); g_test_add_func ("/uri/list", run_uri_list_tests); g_test_add_func ("/uri/unescape", test_uri_unescape); g_test_add_func ("/uri/escape", test_uri_escape); g_test_add_func ("/uri/scheme", test_uri_scheme); + g_test_add_func ("/uri/parsing/absolute", test_uri_parsing_absolute); + g_test_add_func ("/uri/parsing/relative", test_uri_parsing_relative); + g_test_add_func ("/uri/build", test_uri_build); + g_test_add_func ("/uri/split", test_uri_split); + g_test_add_func ("/uri/is_valid", test_uri_is_valid); + g_test_add_func ("/uri/to-string", test_uri_to_string); + g_test_add_func ("/uri/join", test_uri_join); + g_test_add_func ("/uri/parse-params", test_uri_parse_params); return g_test_run (); } diff --git a/gobject/gboxed.c b/gobject/gboxed.c index a4c2c3805..6f4a5d49d 100644 --- a/gobject/gboxed.c +++ b/gobject/gboxed.c @@ -169,6 +169,7 @@ G_DEFINE_BOXED_TYPE (GMarkupParseContext, g_markup_parse_context, g_markup_parse G_DEFINE_BOXED_TYPE (GThread, g_thread, g_thread_ref, g_thread_unref) G_DEFINE_BOXED_TYPE (GChecksum, g_checksum, g_checksum_copy, g_checksum_free) +G_DEFINE_BOXED_TYPE (GUri, g_uri, g_uri_ref, g_uri_unref) G_DEFINE_BOXED_TYPE (GOptionGroup, g_option_group, g_option_group_ref, g_option_group_unref) diff --git a/gobject/glib-types.h b/gobject/glib-types.h index 78a199d1d..b24e76bf1 100644 --- a/gobject/glib-types.h +++ b/gobject/glib-types.h @@ -297,6 +297,15 @@ typedef gsize GType; */ #define G_TYPE_OPTION_GROUP (g_option_group_get_type ()) +/** + * G_TYPE_URI: + * + * The #GType for a boxed type holding a #GUri. + * + * Since: 2.66 + */ +#define G_TYPE_URI (g_uri_get_type ()) + GLIB_AVAILABLE_IN_ALL GType g_date_get_type (void) G_GNUC_CONST; GLIB_AVAILABLE_IN_ALL @@ -353,6 +362,8 @@ GLIB_AVAILABLE_IN_2_40 GType g_mapped_file_get_type (void) G_GNUC_CONST; GLIB_AVAILABLE_IN_2_44 GType g_option_group_get_type (void) G_GNUC_CONST; +GLIB_AVAILABLE_IN_2_66 +GType g_uri_get_type (void) G_GNUC_CONST; GLIB_DEPRECATED_FOR('G_TYPE_VARIANT') GType g_variant_get_gtype (void) G_GNUC_CONST;