diff --git a/glib/guri.c b/glib/guri.c index 23b8e6dd8..19fe4ac71 100644 --- a/glib/guri.c +++ b/glib/guri.c @@ -794,6 +794,21 @@ normalize_port (const char *scheme, return port; } +static int +default_scheme_port (const char *scheme) +{ + if (strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0) + return 80; + + if (strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0) + return 443; + + if (strcmp (scheme, "ftp") == 0) + return 21; + + return -1; +} + static gboolean g_uri_split_internal (const gchar *uri_string, GUriFlags flags, @@ -989,8 +1004,8 @@ g_uri_split_internal (const gchar *uri_string, *path = g_strdup ("/"); } - if (port && *port != -1) - *port = normalize_port (scheme_str, *port); + if (port && *port == -1) + *port = default_scheme_port (scheme_str); } g_free (normalized_scheme); @@ -1568,6 +1583,7 @@ g_uri_join_internal (GUriFlags flags, { gboolean encoded = (flags & G_URI_FLAGS_ENCODED); GString *str; + char *normalized_scheme = NULL; /* Restrictions on path prefixes. See: * https://tools.ietf.org/html/rfc3986#section-3 @@ -1580,6 +1596,9 @@ g_uri_join_internal (GUriFlags flags, if (scheme) g_string_append_c (str, ':'); + if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && scheme && ((host && port != -1) || path[0] == '\0')) + normalized_scheme = g_ascii_strdown (scheme, -1); + if (host) { g_string_append (str, "//"); @@ -1640,15 +1659,19 @@ g_uri_join_internal (GUriFlags flags, g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE); } - if (port != -1) + if (port != -1 && (!normalized_scheme || normalize_port (normalized_scheme, port) != -1)) g_string_append_printf (str, ":%d", port); } - if (encoded || flags & G_URI_FLAGS_ENCODED_PATH) + if (path[0] == '\0' && normalized_scheme && should_normalize_empty_path (normalized_scheme)) + g_string_append (str, "/"); + else if (encoded || flags & G_URI_FLAGS_ENCODED_PATH) g_string_append (str, path); else g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE); + g_free (normalized_scheme); + if (query) { g_string_append_c (str, '?'); @@ -2432,6 +2455,9 @@ g_uri_get_port (GUri *uri) { g_return_val_if_fail (uri != NULL, -1); + if (uri->port == -1 && uri->flags & G_URI_FLAGS_SCHEME_NORMALIZE) + return default_scheme_port (uri->scheme); + return uri->port; } diff --git a/glib/guri.h b/glib/guri.h index fecbfed8e..b6a4fd033 100644 --- a/glib/guri.h +++ b/glib/guri.h @@ -62,10 +62,10 @@ void g_uri_unref (GUri *uri); * @G_URI_FLAGS_ENCODED_PATH: Same as %G_URI_FLAGS_ENCODED, for the path only. * @G_URI_FLAGS_ENCODED_FRAGMENT: Same as %G_URI_FLAGS_ENCODED, for the * fragment only. - * @G_URI_FLAGS_SCHEME_NORMALIZE: Applies scheme-based normalization to the - * parsed URI. For example when parsing an HTTP URI changing empty paths - * to `/` and changing port `80` to `-1`. This only supports a subset - * of known schemes. (Since: 2.68) + * @G_URI_FLAGS_SCHEME_NORMALIZE: A scheme-based normalization will be applied. + * For example, when parsing an HTTP URI changing omitted path to `/` and + * omitted port to `80`; and when building a URI, changing empty path to `/` + * and default port `80`). This only supports a subset of known schemes. (Since: 2.68) * * Flags that describe a URI. * diff --git a/glib/tests/uri.c b/glib/tests/uri.c index c666fc5af..6beead94f 100644 --- a/glib/tests/uri.c +++ b/glib/tests/uri.c @@ -1715,38 +1715,146 @@ static const struct const gchar *uri; GUriFlags flags; /* Outputs */ + const gchar *uri_string; const gchar *path; int port; -} normalize_tests[] = +} normalize_parse_tests[] = { { NULL, "http://foo/path with spaces", G_URI_FLAGS_ENCODED, - "/path%20with%20spaces", -1 }, + "http://foo/path%20with%20spaces", "/path%20with%20spaces", -1 }, { NULL, "http://foo/path with spaces 2", G_URI_FLAGS_ENCODED_PATH, - "/path%20with%20spaces%202", -1 }, + "http://foo/path%20with%20spaces%202", "/path%20with%20spaces%202", -1 }, { NULL, "http://foo/%aa", G_URI_FLAGS_ENCODED, - "/%AA", -1 }, + "http://foo/%AA", "/%AA", -1 }, { NULL, "http://foo/p\xc3\xa4th/", G_URI_FLAGS_ENCODED | G_URI_FLAGS_PARSE_RELAXED, - "/p%C3%A4th/", -1 }, + "http://foo/p%C3%A4th/", "/p%C3%A4th/", -1 }, + { NULL, "http://foo", G_URI_FLAGS_NONE, + "http://foo", "", -1 }, { NULL, "http://foo", G_URI_FLAGS_SCHEME_NORMALIZE, - "/", -1 }, + "http://foo/", "/", 80 }, { NULL, "nothttp://foo", G_URI_FLAGS_SCHEME_NORMALIZE, - "", -1 }, + "nothttp://foo", "", -1 }, + { NULL, "http://foo:80", G_URI_FLAGS_NONE, + "http://foo:80", "", 80 }, { NULL, "http://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE, - "/", -1 }, + "http://foo/", "/", 80 }, + { NULL, "http://foo:8080", G_URI_FLAGS_SCHEME_NORMALIZE, + "http://foo:8080/", "/", 8080 }, { NULL, "https://foo:443", G_URI_FLAGS_SCHEME_NORMALIZE, - "/", -1 }, + "https://foo/", "/", 443 }, + { NULL, "https://foo:943", G_URI_FLAGS_SCHEME_NORMALIZE, + "https://foo:943/", "/", 943 }, + { NULL, "ws://foo", G_URI_FLAGS_SCHEME_NORMALIZE, + "ws://foo/", "/", 80 }, + { NULL, "wss://foo:443", G_URI_FLAGS_SCHEME_NORMALIZE, + "wss://foo/", "/", 443 }, + { NULL, "ftp://foo", G_URI_FLAGS_NONE, + "ftp://foo", "", -1 }, + { NULL, "ftp://foo", G_URI_FLAGS_SCHEME_NORMALIZE, + "ftp://foo", "", 21 }, { NULL, "ftp://foo:21", G_URI_FLAGS_SCHEME_NORMALIZE, - "", -1 }, + "ftp://foo", "", 21 }, + { NULL, "ftp://foo:2100", G_URI_FLAGS_SCHEME_NORMALIZE, + "ftp://foo:2100", "", 2100 }, { NULL, "nothttp://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE, - "", 80 }, + "nothttp://foo:80", "", 80 }, { "http://foo", "//bar", G_URI_FLAGS_SCHEME_NORMALIZE, - "/", -1 }, + "http://bar/", "/", 80 }, { "http://foo", "//bar:80", G_URI_FLAGS_SCHEME_NORMALIZE, - "/", -1 }, + "http://bar/", "/", 80 }, { "nothttp://foo", "//bar:80", G_URI_FLAGS_SCHEME_NORMALIZE, - "", 80 }, - { "http://foo", "//bar", 0, - "", -1 }, + "nothttp://bar:80", "", 80 }, + { "http://foo", "//bar", G_URI_FLAGS_NONE, + "http://bar", "", -1 }, + }; + +static const struct +{ + /* Inputs */ + const gchar *uri; + GUriFlags flags; + /* Outputs */ + const char *scheme; + const gchar *path; + int port; +} normalize_split_tests[] = + { + { "HTTP://foo", G_URI_FLAGS_ENCODED, + "http", "", -1 }, + { "HTTP://foo", G_URI_FLAGS_SCHEME_NORMALIZE, + "http", "/", 80 }, + { "http://foo:80/", G_URI_FLAGS_SCHEME_NORMALIZE, + "http", "/", 80 }, + { "http://foo:8080/bar", G_URI_FLAGS_SCHEME_NORMALIZE, + "http", "/bar", 8080 }, + { "ws://foo", G_URI_FLAGS_SCHEME_NORMALIZE, + "ws", "/", 80 }, + { "https://foo", G_URI_FLAGS_ENCODED, + "https", "", -1 }, + { "https://foo", G_URI_FLAGS_SCHEME_NORMALIZE, + "https", "/", 443 }, + { "https://foo:443/", G_URI_FLAGS_SCHEME_NORMALIZE, + "https", "/", 443 }, + { "wss://foo", G_URI_FLAGS_SCHEME_NORMALIZE, + "wss", "/", 443 }, + { "ftp://foo", G_URI_FLAGS_ENCODED, + "ftp", "", -1 }, + { "ftp://foo", G_URI_FLAGS_SCHEME_NORMALIZE, + "ftp", "", 21 }, + { "ftp://foo:21", G_URI_FLAGS_SCHEME_NORMALIZE, + "ftp", "", 21 }, + { "scheme://foo", G_URI_FLAGS_SCHEME_NORMALIZE, + "scheme", "", -1 }, + }; + +static const struct +{ + /* Inputs */ + GUriFlags flags; + const gchar *scheme; + const gchar *host; + int port; + const gchar *path; + /* Outputs */ + const gchar *uri; +} normalize_join_tests[] = + { + { G_URI_FLAGS_NONE, "http", "foo", -1, "", + "http://foo" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "http", "foo", -1, "", + "http://foo/" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "http", "foo", 80, "", + "http://foo/" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "http", "foo", 8080, "", + "http://foo:8080/" }, + { G_URI_FLAGS_NONE, "http", "foo", 80, "", + "http://foo:80" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "ws", "foo", 80, "", + "ws://foo/" }, + { G_URI_FLAGS_NONE, "https", "foo", -1, "", + "https://foo" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "https", "foo", -1, "", + "https://foo/" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "https", "foo", 443, "", + "https://foo/" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "https", "foo", 943, "", + "https://foo:943/" }, + { G_URI_FLAGS_NONE, "https", "foo", 443, "", + "https://foo:443" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "wss", "foo", 443, "", + "wss://foo/" }, + { G_URI_FLAGS_NONE, "ftp", "foo", -1, "", + "ftp://foo" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "ftp", "foo", -1, "", + "ftp://foo" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "ftp", "foo", 21, "", + "ftp://foo" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "ftp", "foo", 2020, "", + "ftp://foo:2020" }, + { G_URI_FLAGS_NONE, "ftp", "foo", 21, "", + "ftp://foo:21" }, + { G_URI_FLAGS_SCHEME_NORMALIZE, "scheme", "foo", 80, "", + "scheme://foo:80" }, }; static void @@ -1754,31 +1862,61 @@ test_uri_normalize (void) { gsize i; int port; + char *path; + char *uri_string; - for (i = 0; i < G_N_ELEMENTS (normalize_tests); ++i) + for (i = 0; i < G_N_ELEMENTS (normalize_parse_tests); ++i) { GUri *uri, *base = NULL; - if (normalize_tests[i].base) - base = g_uri_parse (normalize_tests[i].base, normalize_tests[i].flags, NULL); + + if (normalize_parse_tests[i].base) + base = g_uri_parse (normalize_parse_tests[i].base, normalize_parse_tests[i].flags, NULL); uri = g_uri_parse_relative (base, - normalize_tests[i].uri, - normalize_tests[i].flags, + normalize_parse_tests[i].uri, + normalize_parse_tests[i].flags, NULL); + uri_string = g_uri_to_string (uri); g_assert_nonnull (uri); - g_assert_cmpstr (g_uri_get_path (uri), ==, normalize_tests[i].path); - g_assert_cmpint (g_uri_get_port (uri), ==, normalize_tests[i].port); + g_assert_cmpstr (g_uri_get_path (uri), ==, normalize_parse_tests[i].path); + g_assert_cmpint (g_uri_get_port (uri), ==, normalize_parse_tests[i].port); + g_assert_cmpstr (uri_string, ==, normalize_parse_tests[i].uri_string); + g_free (uri_string); g_uri_unref (uri); if (base) g_uri_unref (base); } - /* One off testing a codepath where scheme is NULL but internally we still normalize it. */ - g_assert_true (g_uri_split ("HTTP://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE, - NULL, NULL, NULL, &port, NULL, NULL, NULL, NULL)); - g_assert_cmpint (port, ==, -1); + for (i = 0; i < G_N_ELEMENTS (normalize_split_tests); ++i) + { + char *scheme; + + /* Testing a codepath where scheme is NULL but internally we still normalize it. */ + g_assert_true (g_uri_split (normalize_split_tests[i].uri, normalize_split_tests[i].flags, + NULL, NULL, NULL, &port, &path, NULL, NULL, NULL)); + g_assert_cmpstr (path, ==, normalize_split_tests[i].path); + g_assert_cmpint (port, ==, normalize_split_tests[i].port); + g_free (path); + + g_assert_true (g_uri_split (normalize_split_tests[i].uri, normalize_split_tests[i].flags, + &scheme, NULL, NULL, &port, &path, NULL, NULL, NULL)); + g_assert_cmpstr (scheme, ==, normalize_split_tests[i].scheme); + g_assert_cmpstr (path, ==, normalize_split_tests[i].path); + g_assert_cmpint (port, ==, normalize_split_tests[i].port); + g_free (scheme); + g_free (path); + } + + for (i = 0; i < G_N_ELEMENTS (normalize_join_tests); ++i) + { + uri_string = g_uri_join (normalize_join_tests[i].flags, normalize_join_tests[i].scheme, NULL, + normalize_join_tests[i].host, normalize_join_tests[i].port, + normalize_join_tests[i].path, NULL, NULL); + g_assert_cmpstr (uri_string, ==, normalize_join_tests[i].uri); + g_free (uri_string); + } } int