mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-03-30 20:33:08 +02:00
guri: Normalize uri segments if they are encoded
This changes it so when a segment is encoded it will be normalized at parse time which ensures its valid and it can more easily be compared with other uris.
This commit is contained in:
parent
63dfceedd2
commit
482e10d3bb
42
glib/guri.c
42
glib/guri.c
@ -289,15 +289,16 @@ uri_decoder (gchar **out,
|
|||||||
GUriError parse_error,
|
GUriError parse_error,
|
||||||
GError **error)
|
GError **error)
|
||||||
{
|
{
|
||||||
gchar *decoded, *d, c;
|
gchar c;
|
||||||
|
GString *decoded;
|
||||||
const gchar *invalid, *s, *end;
|
const gchar *invalid, *s, *end;
|
||||||
gssize len;
|
gssize len;
|
||||||
|
|
||||||
if (!(flags & G_URI_FLAGS_ENCODED))
|
if (!(flags & G_URI_FLAGS_ENCODED))
|
||||||
just_normalize = FALSE;
|
just_normalize = FALSE;
|
||||||
|
|
||||||
decoded = g_malloc (length + 1);
|
decoded = g_string_sized_new (length + 1);
|
||||||
for (s = start, end = s + length, d = decoded; s < end; s++)
|
for (s = start, end = s + length; s < end; s++)
|
||||||
{
|
{
|
||||||
if (*s == '%')
|
if (*s == '%')
|
||||||
{
|
{
|
||||||
@ -311,7 +312,7 @@ uri_decoder (gchar **out,
|
|||||||
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
||||||
/* xgettext: no-c-format */
|
/* xgettext: no-c-format */
|
||||||
_("Invalid %-encoding in URI"));
|
_("Invalid %-encoding in URI"));
|
||||||
g_free (decoded);
|
g_string_free (decoded, TRUE);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -319,7 +320,7 @@ uri_decoder (gchar **out,
|
|||||||
* fix it to "%25", since that might change the way that
|
* fix it to "%25", since that might change the way that
|
||||||
* the URI's owner would interpret it.
|
* the URI's owner would interpret it.
|
||||||
*/
|
*/
|
||||||
*d++ = *s;
|
g_string_append_c (decoded, *s);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -328,43 +329,50 @@ uri_decoder (gchar **out,
|
|||||||
{
|
{
|
||||||
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
||||||
_("Illegal character in URI"));
|
_("Illegal character in URI"));
|
||||||
g_free (decoded);
|
g_string_free (decoded, TRUE);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (just_normalize && !g_uri_char_is_unreserved (c))
|
if (just_normalize && !g_uri_char_is_unreserved (c))
|
||||||
{
|
{
|
||||||
/* Leave the % sequence there. */
|
/* Leave the % sequence there but normalize it. */
|
||||||
*d++ = *s;
|
g_string_append_c (decoded, *s);
|
||||||
|
g_string_append_c (decoded, g_ascii_toupper (s[1]));
|
||||||
|
g_string_append_c (decoded, g_ascii_toupper (s[2]));
|
||||||
|
s += 2;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*d++ = c;
|
g_string_append_c (decoded, c);
|
||||||
s += 2;
|
s += 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (www_form && *s == '+')
|
else if (www_form && *s == '+')
|
||||||
*d++ = ' ';
|
g_string_append_c (decoded, ' ');
|
||||||
|
/* Normalize any illegal characters */
|
||||||
|
else if (just_normalize && (!g_ascii_isgraph (*s) ||
|
||||||
|
(illegal_chars && strchr (illegal_chars, *s))))
|
||||||
|
g_string_append_printf (decoded, "%%%02X", (guchar)*s);
|
||||||
else
|
else
|
||||||
*d++ = *s;
|
g_string_append_c (decoded, *s);
|
||||||
}
|
}
|
||||||
*d = '\0';
|
|
||||||
|
|
||||||
len = d - decoded;
|
len = decoded->len;
|
||||||
g_assert (len >= 0);
|
g_assert (len >= 0);
|
||||||
|
|
||||||
if (!(flags & G_URI_FLAGS_ENCODED) &&
|
if (!(flags & G_URI_FLAGS_ENCODED) &&
|
||||||
!g_utf8_validate (decoded, len, &invalid))
|
!g_utf8_validate (decoded->str, len, &invalid))
|
||||||
{
|
{
|
||||||
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
g_set_error_literal (error, G_URI_ERROR, parse_error,
|
||||||
_("Non-UTF-8 characters in URI"));
|
_("Non-UTF-8 characters in URI"));
|
||||||
g_free (decoded);
|
g_string_free (decoded, TRUE);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (out)
|
if (out)
|
||||||
*out = g_steal_pointer (&decoded);
|
*out = g_string_free (decoded, FALSE);
|
||||||
|
else
|
||||||
|
g_string_free (decoded, TRUE);
|
||||||
|
|
||||||
g_free (decoded);
|
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1708,6 +1708,41 @@ test_uri_join_split_round_trip (void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const struct
|
||||||
|
{
|
||||||
|
/* Inputs */
|
||||||
|
const gchar *uri;
|
||||||
|
GUriFlags flags;
|
||||||
|
/* Outputs */
|
||||||
|
const gchar *path;
|
||||||
|
} normalize_tests[] =
|
||||||
|
{
|
||||||
|
{ "http://foo/path with spaces", G_URI_FLAGS_ENCODED,
|
||||||
|
"/path%20with%20spaces" },
|
||||||
|
{ "http://foo/path with spaces 2", G_URI_FLAGS_ENCODED_PATH,
|
||||||
|
"/path%20with%20spaces%202" },
|
||||||
|
{ "http://foo/%aa", G_URI_FLAGS_ENCODED,
|
||||||
|
"/%AA" },
|
||||||
|
{ "http://foo/p\xc3\xa4th/", G_URI_FLAGS_ENCODED | G_URI_FLAGS_PARSE_RELAXED,
|
||||||
|
"/p%C3%A4th/" },
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_uri_normalize (void)
|
||||||
|
{
|
||||||
|
gsize i;
|
||||||
|
|
||||||
|
for (i = 0; i < G_N_ELEMENTS (normalize_tests); ++i)
|
||||||
|
{
|
||||||
|
GUri *uri = g_uri_parse (normalize_tests[i].uri,
|
||||||
|
normalize_tests[i].flags,
|
||||||
|
NULL);
|
||||||
|
g_assert_nonnull (uri);
|
||||||
|
g_assert_cmpstr (g_uri_get_path (uri), ==, normalize_tests[i].path);
|
||||||
|
g_uri_unref (uri);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main (int argc,
|
main (int argc,
|
||||||
char *argv[])
|
char *argv[])
|
||||||
@ -1733,6 +1768,7 @@ main (int argc,
|
|||||||
g_test_add_func ("/uri/to-string", test_uri_to_string);
|
g_test_add_func ("/uri/to-string", test_uri_to_string);
|
||||||
g_test_add_func ("/uri/join", test_uri_join);
|
g_test_add_func ("/uri/join", test_uri_join);
|
||||||
g_test_add_func ("/uri/join-split-round-trip", test_uri_join_split_round_trip);
|
g_test_add_func ("/uri/join-split-round-trip", test_uri_join_split_round_trip);
|
||||||
|
g_test_add_func ("/uri/normalize", test_uri_normalize);
|
||||||
g_test_add_data_func ("/uri/iter-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_iter_params);
|
g_test_add_data_func ("/uri/iter-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_iter_params);
|
||||||
g_test_add_data_func ("/uri/iter-params/length", GINT_TO_POINTER (FALSE), test_uri_iter_params);
|
g_test_add_data_func ("/uri/iter-params/length", GINT_TO_POINTER (FALSE), test_uri_iter_params);
|
||||||
g_test_add_data_func ("/uri/parse-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_parse_params);
|
g_test_add_data_func ("/uri/parse-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_parse_params);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user