guri: Add G_URI_FLAGS_SCHEME_NORMALIZE

This flag enables optional scheme-defined normalization
during parsing of a URI.
This commit is contained in:
Patrick Griffis 2020-10-23 14:36:54 -05:00
parent 482e10d3bb
commit 64f478dca3
3 changed files with 135 additions and 15 deletions

View File

@ -348,9 +348,8 @@ uri_decoder (gchar **out,
}
else if (www_form && *s == '+')
g_string_append_c (decoded, ' ');
/* Normalize any illegal characters */
else if (just_normalize && (!g_ascii_isgraph (*s) ||
(illegal_chars && strchr (illegal_chars, *s))))
/* Normalize any illegal characters. */
else if (just_normalize && (!g_ascii_isgraph (*s)))
g_string_append_printf (decoded, "%%%02X", (guchar)*s);
else
g_string_append_c (decoded, *s);
@ -748,6 +747,52 @@ uri_cleanup (const gchar *uri_string)
return g_string_free (copy, FALSE);
}
static gboolean
should_normalize_empty_path (const char *scheme)
{
const char * const schemes[] = { "https", "http", "wss", "ws" };
int i;
for (i = 0; i < G_N_ELEMENTS (schemes); ++i)
{
if (!strcmp (schemes[i], scheme))
return TRUE;
}
return FALSE;
}
static int
normalize_port (const char *scheme,
int port)
{
const char *default_schemes[3] = { NULL };
int i;
switch (port)
{
case 21:
default_schemes[0] = "ftp";
break;
case 80:
default_schemes[0] = "http";
default_schemes[1] = "ws";
break;
case 443:
default_schemes[0] = "https";
default_schemes[1] = "wss";
break;
default:
break;
}
for (i = 0; default_schemes[i]; ++i)
{
if (!strcmp (scheme, default_schemes[i]))
return -1;
}
return port;
}
static gboolean
g_uri_split_internal (const gchar *uri_string,
GUriFlags flags,
@ -766,6 +811,7 @@ g_uri_split_internal (const gchar *uri_string,
const gchar *end, *colon, *at, *path_start, *semi, *question;
const gchar *p, *bracket, *hostend;
gchar *cleaned_uri_string = NULL;
gchar *normalized_scheme = NULL;
if (scheme)
*scheme = NULL;
@ -803,8 +849,9 @@ g_uri_split_internal (const gchar *uri_string,
if (p > uri_string && *p == ':')
{
normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
if (scheme)
*scheme = g_ascii_strdown (uri_string, p - uri_string);
*scheme = g_steal_pointer (&normalized_scheme);
p++;
}
else
@ -930,6 +977,22 @@ g_uri_split_internal (const gchar *uri_string,
G_URI_ERROR_BAD_PATH, error))
goto fail;
/* Scheme-based normalization */
if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
{
const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
if (should_normalize_empty_path (scheme_str) && path && !**path)
{
g_free (*path);
*path = g_strdup ("/");
}
if (port && *port != -1)
*port = normalize_port (scheme_str, *port);
}
g_free (normalized_scheme);
g_free (cleaned_uri_string);
return TRUE;
@ -949,6 +1012,7 @@ g_uri_split_internal (const gchar *uri_string,
if (fragment)
g_clear_pointer (fragment, g_free);
g_free (normalized_scheme);
g_free (cleaned_uri_string);
return FALSE;
}
@ -1402,6 +1466,19 @@ g_uri_parse_relative (GUri *base_uri,
uri->port = base_uri->port;
}
}
/* Scheme normalization couldn't have been done earlier
* as the relative URI may not have had a scheme */
if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
{
if (should_normalize_empty_path (uri->scheme) && !*uri->path)
{
g_free (uri->path);
uri->path = g_strdup ("/");
}
uri->port = normalize_port (uri->scheme, uri->port);
}
}
return g_steal_pointer (&uri);

View File

@ -62,6 +62,10 @@ void g_uri_unref (GUri *uri);
* @G_URI_FLAGS_ENCODED_PATH: Same as %G_URI_FLAGS_ENCODED, for the path only.
* @G_URI_FLAGS_ENCODED_FRAGMENT: Same as %G_URI_FLAGS_ENCODED, for the
* fragment only.
* @G_URI_FLAGS_SCHEME_NORMALIZE: Applies scheme-based normalization to the
* parsed URI. For example when parsing an HTTP URI changing empty paths
* to `/` and changing port `80` to `-1`. This only supports a subset
* of known schemes. (Since: 2.68)
*
* Flags that describe a URI.
*
@ -83,6 +87,7 @@ typedef enum {
G_URI_FLAGS_ENCODED_QUERY = 1 << 5,
G_URI_FLAGS_ENCODED_PATH = 1 << 6,
G_URI_FLAGS_ENCODED_FRAGMENT = 1 << 7,
G_URI_FLAGS_SCHEME_NORMALIZE = 1 << 8,
} GUriFlags;
GLIB_AVAILABLE_IN_2_66

View File

@ -1711,36 +1711,74 @@ test_uri_join_split_round_trip (void)
static const struct
{
/* Inputs */
const gchar *base;
const gchar *uri;
GUriFlags flags;
/* Outputs */
const gchar *path;
int port;
} normalize_tests[] =
{
{ "http://foo/path with spaces", G_URI_FLAGS_ENCODED,
"/path%20with%20spaces" },
{ "http://foo/path with spaces 2", G_URI_FLAGS_ENCODED_PATH,
"/path%20with%20spaces%202" },
{ "http://foo/%aa", G_URI_FLAGS_ENCODED,
"/%AA" },
{ "http://foo/p\xc3\xa4th/", G_URI_FLAGS_ENCODED | G_URI_FLAGS_PARSE_RELAXED,
"/p%C3%A4th/" },
{ NULL, "http://foo/path with spaces", G_URI_FLAGS_ENCODED,
"/path%20with%20spaces", -1 },
{ NULL, "http://foo/path with spaces 2", G_URI_FLAGS_ENCODED_PATH,
"/path%20with%20spaces%202", -1 },
{ NULL, "http://foo/%aa", G_URI_FLAGS_ENCODED,
"/%AA", -1 },
{ NULL, "http://foo/p\xc3\xa4th/", G_URI_FLAGS_ENCODED | G_URI_FLAGS_PARSE_RELAXED,
"/p%C3%A4th/", -1 },
{ NULL, "http://foo", G_URI_FLAGS_SCHEME_NORMALIZE,
"/", -1 },
{ NULL, "nothttp://foo", G_URI_FLAGS_SCHEME_NORMALIZE,
"", -1 },
{ NULL, "http://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE,
"/", -1 },
{ NULL, "https://foo:443", G_URI_FLAGS_SCHEME_NORMALIZE,
"/", -1 },
{ NULL, "ftp://foo:21", G_URI_FLAGS_SCHEME_NORMALIZE,
"", -1 },
{ NULL, "nothttp://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE,
"", 80 },
{ "http://foo", "//bar", G_URI_FLAGS_SCHEME_NORMALIZE,
"/", -1 },
{ "http://foo", "//bar:80", G_URI_FLAGS_SCHEME_NORMALIZE,
"/", -1 },
{ "nothttp://foo", "//bar:80", G_URI_FLAGS_SCHEME_NORMALIZE,
"", 80 },
{ "http://foo", "//bar", 0,
"", -1 },
};
static void
test_uri_normalize (void)
{
gsize i;
int port;
for (i = 0; i < G_N_ELEMENTS (normalize_tests); ++i)
{
GUri *uri = g_uri_parse (normalize_tests[i].uri,
normalize_tests[i].flags,
NULL);
GUri *uri, *base = NULL;
if (normalize_tests[i].base)
base = g_uri_parse (normalize_tests[i].base, normalize_tests[i].flags, NULL);
uri = g_uri_parse_relative (base,
normalize_tests[i].uri,
normalize_tests[i].flags,
NULL);
g_assert_nonnull (uri);
g_assert_cmpstr (g_uri_get_path (uri), ==, normalize_tests[i].path);
g_assert_cmpint (g_uri_get_port (uri), ==, normalize_tests[i].port);
g_uri_unref (uri);
if (base)
g_uri_unref (base);
}
/* One off testing a codepath where scheme is NULL but internally we still normalize it. */
g_assert_true (g_uri_split ("HTTP://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE,
NULL, NULL, NULL, &port, NULL, NULL, NULL, NULL));
g_assert_cmpint (port, ==, -1);
}
int