mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-12 07:26:15 +01:00
Merge branch 'wip/tingping/guri-normalize' into 'master'
guri: Normalize uri segments if they are encoded and add a flag to do scheme-based normalization See merge request GNOME/glib!1716
This commit is contained in:
commit
b8927cc6ad
132
glib/guri.c
132
glib/guri.c
@ -132,11 +132,12 @@
|
|||||||
*
|
*
|
||||||
* Note that there is no `g_uri_equal ()` function, because comparing
|
* Note that there is no `g_uri_equal ()` function, because comparing
|
||||||
* URIs usefully requires scheme-specific knowledge that #GUri does
|
* URIs usefully requires scheme-specific knowledge that #GUri does
|
||||||
* not have. For example, `http://example.com/` and
|
* not have. #GUri can help with normalization if you use the various
|
||||||
* `http://EXAMPLE.COM:80` have exactly the same meaning according
|
* encoded #GUriFlags as well as %G_URI_FLAGS_SCHEME_NORMALIZE however
|
||||||
* to the HTTP specification, and `data:,foo` and
|
* it is not comprehensive.
|
||||||
* `data:;base64,Zm9v` resolve to the same thing according to the
|
* For example, `data:,foo` and `data:;base64,Zm9v` resolve to the same
|
||||||
* `data:` URI specification.
|
* thing according to the `data:` URI specification which GLib does not
|
||||||
|
* handle.
|
||||||
*
|
*
|
||||||
* Since: 2.66
|
* Since: 2.66
|
||||||
*/
|
*/
|
||||||
@ -289,15 +290,16 @@ uri_decoder (gchar **out,
|
|||||||
GUriError parse_error,
|
GUriError parse_error,
|
||||||
GError **error)
|
GError **error)
|
||||||
{
|
{
|
||||||
gchar *decoded, *d, c;
|
gchar c;
|
||||||
|
GString *decoded;
|
||||||
const gchar *invalid, *s, *end;
|
const gchar *invalid, *s, *end;
|
||||||
gssize len;
|
gssize len;
|
||||||
|
|
||||||
if (!(flags & G_URI_FLAGS_ENCODED))
|
if (!(flags & G_URI_FLAGS_ENCODED))
|
||||||
just_normalize = FALSE;
|
just_normalize = FALSE;
|
||||||
|
|
||||||
decoded = g_malloc (length + 1);
|
decoded = g_string_sized_new (length + 1);
|
||||||
for (s = start, end = s + length, d = decoded; s < end; s++)
|
for (s = start, end = s + length; s < end; s++)
|
||||||
{
|
{
|
||||||
if (*s == '%')
|
if (*s == '%')
|
||||||
{
|
{
|
||||||
@ -311,7 +313,7 @@ uri_decoder (gchar **out,
|
|||||||
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
||||||
/* xgettext: no-c-format */
|
/* xgettext: no-c-format */
|
||||||
_("Invalid %-encoding in URI"));
|
_("Invalid %-encoding in URI"));
|
||||||
g_free (decoded);
|
g_string_free (decoded, TRUE);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -319,7 +321,7 @@ uri_decoder (gchar **out,
|
|||||||
* fix it to "%25", since that might change the way that
|
* fix it to "%25", since that might change the way that
|
||||||
* the URI's owner would interpret it.
|
* the URI's owner would interpret it.
|
||||||
*/
|
*/
|
||||||
*d++ = *s;
|
g_string_append_c (decoded, *s);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -328,43 +330,49 @@ uri_decoder (gchar **out,
|
|||||||
{
|
{
|
||||||
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
||||||
_("Illegal character in URI"));
|
_("Illegal character in URI"));
|
||||||
g_free (decoded);
|
g_string_free (decoded, TRUE);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (just_normalize && !g_uri_char_is_unreserved (c))
|
if (just_normalize && !g_uri_char_is_unreserved (c))
|
||||||
{
|
{
|
||||||
/* Leave the % sequence there. */
|
/* Leave the % sequence there but normalize it. */
|
||||||
*d++ = *s;
|
g_string_append_c (decoded, *s);
|
||||||
|
g_string_append_c (decoded, g_ascii_toupper (s[1]));
|
||||||
|
g_string_append_c (decoded, g_ascii_toupper (s[2]));
|
||||||
|
s += 2;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*d++ = c;
|
g_string_append_c (decoded, c);
|
||||||
s += 2;
|
s += 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (www_form && *s == '+')
|
else if (www_form && *s == '+')
|
||||||
*d++ = ' ';
|
g_string_append_c (decoded, ' ');
|
||||||
|
/* Normalize any illegal characters. */
|
||||||
|
else if (just_normalize && (!g_ascii_isgraph (*s)))
|
||||||
|
g_string_append_printf (decoded, "%%%02X", (guchar)*s);
|
||||||
else
|
else
|
||||||
*d++ = *s;
|
g_string_append_c (decoded, *s);
|
||||||
}
|
}
|
||||||
*d = '\0';
|
|
||||||
|
|
||||||
len = d - decoded;
|
len = decoded->len;
|
||||||
g_assert (len >= 0);
|
g_assert (len >= 0);
|
||||||
|
|
||||||
if (!(flags & G_URI_FLAGS_ENCODED) &&
|
if (!(flags & G_URI_FLAGS_ENCODED) &&
|
||||||
!g_utf8_validate (decoded, len, &invalid))
|
!g_utf8_validate (decoded->str, len, &invalid))
|
||||||
{
|
{
|
||||||
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
||||||
_("Non-UTF-8 characters in URI"));
|
_("Non-UTF-8 characters in URI"));
|
||||||
g_free (decoded);
|
g_string_free (decoded, TRUE);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (out)
|
if (out)
|
||||||
*out = g_steal_pointer (&decoded);
|
*out = g_string_free (decoded, FALSE);
|
||||||
|
else
|
||||||
|
g_string_free (decoded, TRUE);
|
||||||
|
|
||||||
g_free (decoded);
|
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -740,6 +748,52 @@ uri_cleanup (const gchar *uri_string)
|
|||||||
return g_string_free (copy, FALSE);
|
return g_string_free (copy, FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
should_normalize_empty_path (const char *scheme)
|
||||||
|
{
|
||||||
|
const char * const schemes[] = { "https", "http", "wss", "ws" };
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < G_N_ELEMENTS (schemes); ++i)
|
||||||
|
{
|
||||||
|
if (!strcmp (schemes[i], scheme))
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
normalize_port (const char *scheme,
|
||||||
|
int port)
|
||||||
|
{
|
||||||
|
const char *default_schemes[3] = { NULL };
|
||||||
|
int i;
|
||||||
|
|
||||||
|
switch (port)
|
||||||
|
{
|
||||||
|
case 21:
|
||||||
|
default_schemes[0] = "ftp";
|
||||||
|
break;
|
||||||
|
case 80:
|
||||||
|
default_schemes[0] = "http";
|
||||||
|
default_schemes[1] = "ws";
|
||||||
|
break;
|
||||||
|
case 443:
|
||||||
|
default_schemes[0] = "https";
|
||||||
|
default_schemes[1] = "wss";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; default_schemes[i]; ++i)
|
||||||
|
{
|
||||||
|
if (!strcmp (scheme, default_schemes[i]))
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return port;
|
||||||
|
}
|
||||||
|
|
||||||
static gboolean
|
static gboolean
|
||||||
g_uri_split_internal (const gchar *uri_string,
|
g_uri_split_internal (const gchar *uri_string,
|
||||||
GUriFlags flags,
|
GUriFlags flags,
|
||||||
@ -758,6 +812,7 @@ g_uri_split_internal (const gchar *uri_string,
|
|||||||
const gchar *end, *colon, *at, *path_start, *semi, *question;
|
const gchar *end, *colon, *at, *path_start, *semi, *question;
|
||||||
const gchar *p, *bracket, *hostend;
|
const gchar *p, *bracket, *hostend;
|
||||||
gchar *cleaned_uri_string = NULL;
|
gchar *cleaned_uri_string = NULL;
|
||||||
|
gchar *normalized_scheme = NULL;
|
||||||
|
|
||||||
if (scheme)
|
if (scheme)
|
||||||
*scheme = NULL;
|
*scheme = NULL;
|
||||||
@ -795,8 +850,9 @@ g_uri_split_internal (const gchar *uri_string,
|
|||||||
|
|
||||||
if (p > uri_string && *p == ':')
|
if (p > uri_string && *p == ':')
|
||||||
{
|
{
|
||||||
|
normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
|
||||||
if (scheme)
|
if (scheme)
|
||||||
*scheme = g_ascii_strdown (uri_string, p - uri_string);
|
*scheme = g_steal_pointer (&normalized_scheme);
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -922,6 +978,22 @@ g_uri_split_internal (const gchar *uri_string,
|
|||||||
G_URI_ERROR_BAD_PATH, error))
|
G_URI_ERROR_BAD_PATH, error))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
|
/* Scheme-based normalization */
|
||||||
|
if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
|
||||||
|
{
|
||||||
|
const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
|
||||||
|
|
||||||
|
if (should_normalize_empty_path (scheme_str) && path && !**path)
|
||||||
|
{
|
||||||
|
g_free (*path);
|
||||||
|
*path = g_strdup ("/");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (port && *port != -1)
|
||||||
|
*port = normalize_port (scheme_str, *port);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free (normalized_scheme);
|
||||||
g_free (cleaned_uri_string);
|
g_free (cleaned_uri_string);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
@ -941,6 +1013,7 @@ g_uri_split_internal (const gchar *uri_string,
|
|||||||
if (fragment)
|
if (fragment)
|
||||||
g_clear_pointer (fragment, g_free);
|
g_clear_pointer (fragment, g_free);
|
||||||
|
|
||||||
|
g_free (normalized_scheme);
|
||||||
g_free (cleaned_uri_string);
|
g_free (cleaned_uri_string);
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
@ -1394,6 +1467,19 @@ g_uri_parse_relative (GUri *base_uri,
|
|||||||
uri->port = base_uri->port;
|
uri->port = base_uri->port;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Scheme normalization couldn't have been done earlier
|
||||||
|
* as the relative URI may not have had a scheme */
|
||||||
|
if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
|
||||||
|
{
|
||||||
|
if (should_normalize_empty_path (uri->scheme) && !*uri->path)
|
||||||
|
{
|
||||||
|
g_free (uri->path);
|
||||||
|
uri->path = g_strdup ("/");
|
||||||
|
}
|
||||||
|
|
||||||
|
uri->port = normalize_port (uri->scheme, uri->port);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return g_steal_pointer (&uri);
|
return g_steal_pointer (&uri);
|
||||||
|
@ -62,6 +62,10 @@ void g_uri_unref (GUri *uri);
|
|||||||
* @G_URI_FLAGS_ENCODED_PATH: Same as %G_URI_FLAGS_ENCODED, for the path only.
|
* @G_URI_FLAGS_ENCODED_PATH: Same as %G_URI_FLAGS_ENCODED, for the path only.
|
||||||
* @G_URI_FLAGS_ENCODED_FRAGMENT: Same as %G_URI_FLAGS_ENCODED, for the
|
* @G_URI_FLAGS_ENCODED_FRAGMENT: Same as %G_URI_FLAGS_ENCODED, for the
|
||||||
* fragment only.
|
* fragment only.
|
||||||
|
* @G_URI_FLAGS_SCHEME_NORMALIZE: Applies scheme-based normalization to the
|
||||||
|
* parsed URI. For example when parsing an HTTP URI changing empty paths
|
||||||
|
* to `/` and changing port `80` to `-1`. This only supports a subset
|
||||||
|
* of known schemes. (Since: 2.68)
|
||||||
*
|
*
|
||||||
* Flags that describe a URI.
|
* Flags that describe a URI.
|
||||||
*
|
*
|
||||||
@ -83,6 +87,7 @@ typedef enum {
|
|||||||
G_URI_FLAGS_ENCODED_QUERY = 1 << 5,
|
G_URI_FLAGS_ENCODED_QUERY = 1 << 5,
|
||||||
G_URI_FLAGS_ENCODED_PATH = 1 << 6,
|
G_URI_FLAGS_ENCODED_PATH = 1 << 6,
|
||||||
G_URI_FLAGS_ENCODED_FRAGMENT = 1 << 7,
|
G_URI_FLAGS_ENCODED_FRAGMENT = 1 << 7,
|
||||||
|
G_URI_FLAGS_SCHEME_NORMALIZE = 1 << 8,
|
||||||
} GUriFlags;
|
} GUriFlags;
|
||||||
|
|
||||||
GLIB_AVAILABLE_IN_2_66
|
GLIB_AVAILABLE_IN_2_66
|
||||||
|
@ -1708,6 +1708,79 @@ test_uri_join_split_round_trip (void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const struct
|
||||||
|
{
|
||||||
|
/* Inputs */
|
||||||
|
const gchar *base;
|
||||||
|
const gchar *uri;
|
||||||
|
GUriFlags flags;
|
||||||
|
/* Outputs */
|
||||||
|
const gchar *path;
|
||||||
|
int port;
|
||||||
|
} normalize_tests[] =
|
||||||
|
{
|
||||||
|
{ NULL, "http://foo/path with spaces", G_URI_FLAGS_ENCODED,
|
||||||
|
"/path%20with%20spaces", -1 },
|
||||||
|
{ NULL, "http://foo/path with spaces 2", G_URI_FLAGS_ENCODED_PATH,
|
||||||
|
"/path%20with%20spaces%202", -1 },
|
||||||
|
{ NULL, "http://foo/%aa", G_URI_FLAGS_ENCODED,
|
||||||
|
"/%AA", -1 },
|
||||||
|
{ NULL, "http://foo/p\xc3\xa4th/", G_URI_FLAGS_ENCODED | G_URI_FLAGS_PARSE_RELAXED,
|
||||||
|
"/p%C3%A4th/", -1 },
|
||||||
|
{ NULL, "http://foo", G_URI_FLAGS_SCHEME_NORMALIZE,
|
||||||
|
"/", -1 },
|
||||||
|
{ NULL, "nothttp://foo", G_URI_FLAGS_SCHEME_NORMALIZE,
|
||||||
|
"", -1 },
|
||||||
|
{ NULL, "http://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE,
|
||||||
|
"/", -1 },
|
||||||
|
{ NULL, "https://foo:443", G_URI_FLAGS_SCHEME_NORMALIZE,
|
||||||
|
"/", -1 },
|
||||||
|
{ NULL, "ftp://foo:21", G_URI_FLAGS_SCHEME_NORMALIZE,
|
||||||
|
"", -1 },
|
||||||
|
{ NULL, "nothttp://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE,
|
||||||
|
"", 80 },
|
||||||
|
{ "http://foo", "//bar", G_URI_FLAGS_SCHEME_NORMALIZE,
|
||||||
|
"/", -1 },
|
||||||
|
{ "http://foo", "//bar:80", G_URI_FLAGS_SCHEME_NORMALIZE,
|
||||||
|
"/", -1 },
|
||||||
|
{ "nothttp://foo", "//bar:80", G_URI_FLAGS_SCHEME_NORMALIZE,
|
||||||
|
"", 80 },
|
||||||
|
{ "http://foo", "//bar", 0,
|
||||||
|
"", -1 },
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_uri_normalize (void)
|
||||||
|
{
|
||||||
|
gsize i;
|
||||||
|
int port;
|
||||||
|
|
||||||
|
for (i = 0; i < G_N_ELEMENTS (normalize_tests); ++i)
|
||||||
|
{
|
||||||
|
GUri *uri, *base = NULL;
|
||||||
|
if (normalize_tests[i].base)
|
||||||
|
base = g_uri_parse (normalize_tests[i].base, normalize_tests[i].flags, NULL);
|
||||||
|
|
||||||
|
uri = g_uri_parse_relative (base,
|
||||||
|
normalize_tests[i].uri,
|
||||||
|
normalize_tests[i].flags,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
g_assert_nonnull (uri);
|
||||||
|
g_assert_cmpstr (g_uri_get_path (uri), ==, normalize_tests[i].path);
|
||||||
|
g_assert_cmpint (g_uri_get_port (uri), ==, normalize_tests[i].port);
|
||||||
|
|
||||||
|
g_uri_unref (uri);
|
||||||
|
if (base)
|
||||||
|
g_uri_unref (base);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* One off testing a codepath where scheme is NULL but internally we still normalize it. */
|
||||||
|
g_assert_true (g_uri_split ("HTTP://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE,
|
||||||
|
NULL, NULL, NULL, &port, NULL, NULL, NULL, NULL));
|
||||||
|
g_assert_cmpint (port, ==, -1);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main (int argc,
|
main (int argc,
|
||||||
char *argv[])
|
char *argv[])
|
||||||
@ -1733,6 +1806,7 @@ main (int argc,
|
|||||||
g_test_add_func ("/uri/to-string", test_uri_to_string);
|
g_test_add_func ("/uri/to-string", test_uri_to_string);
|
||||||
g_test_add_func ("/uri/join", test_uri_join);
|
g_test_add_func ("/uri/join", test_uri_join);
|
||||||
g_test_add_func ("/uri/join-split-round-trip", test_uri_join_split_round_trip);
|
g_test_add_func ("/uri/join-split-round-trip", test_uri_join_split_round_trip);
|
||||||
|
g_test_add_func ("/uri/normalize", test_uri_normalize);
|
||||||
g_test_add_data_func ("/uri/iter-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_iter_params);
|
g_test_add_data_func ("/uri/iter-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_iter_params);
|
||||||
g_test_add_data_func ("/uri/iter-params/length", GINT_TO_POINTER (FALSE), test_uri_iter_params);
|
g_test_add_data_func ("/uri/iter-params/length", GINT_TO_POINTER (FALSE), test_uri_iter_params);
|
||||||
g_test_add_data_func ("/uri/parse-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_parse_params);
|
g_test_add_data_func ("/uri/parse-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_parse_params);
|
||||||
|
Loading…
Reference in New Issue
Block a user