uri: change parse_params() to take a separator set

This should give a bit more flexibility, without drawbacks.

Many URI encoding accept either '&' or ';' as separators.

Change the documentation to reflect that '&' is probably more
common (http query string).

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
This commit is contained in:
Marc-André Lureau 2020-06-30 14:53:26 +04:00
parent db9987d269
commit d022b7199b
4 changed files with 35 additions and 21 deletions

View File

@ -10,7 +10,7 @@ LLVMFuzzerTestOneInput (const unsigned char *data, size_t size)
if (size > G_MAXSSIZE)
return 0;
parsed_params = g_uri_parse_params ((const gchar *) data, (gssize) size, '&', FALSE);
parsed_params = g_uri_parse_params ((const gchar *) data, (gssize) size, "&", FALSE);
if (parsed_params == NULL)
return 0;

View File

@ -1750,8 +1750,11 @@ str_ascii_case_equal (gconstpointer v1,
* @params: a `%`-encoded string containing "attribute=value"
* parameters
* @length: the length of @params, or -1 if it is NUL-terminated
* @separator: the separator character between parameters.
* (usually ';', but sometimes '&')
* @separators: the separator byte character set between parameters. (usually
* "&", but sometimes ";" or both "&;"). Note that this function works on
* bytes not characters, so it can't be used to delimit UTF-8 strings for
* anything but ASCII characters. You may pass an empty set, in which case
* no splitting will occur.
* @case_insensitive: whether parameter names are case insensitive
*
* Many URI schemes include one or more attribute/value pairs as part of the URI
@ -1759,7 +1762,7 @@ str_ascii_case_equal (gconstpointer v1,
*
* The @params string is assumed to still be `%`-encoded, but the returned
* values will be fully decoded. (Thus it is possible that the returned values
* may contain '=' or @separator, if the value was encoded in the input.)
* may contain '=' or @separators, if the value was encoded in the input.)
* Invalid `%`-encoding is treated as with the non-%G_URI_FLAGS_PARSE_STRICT
* rules for g_uri_parse(). (However, if @params is the path or query string
* from a #GUri that was parsed with %G_URI_FLAGS_PARSE_STRICT and
@ -1768,7 +1771,7 @@ str_ascii_case_equal (gconstpointer v1,
*
* Return value: (transfer full) (element-type utf8 utf8): a hash table of
* attribute/value pairs. Both names and values will be fully-decoded. If
* @params cannot be parsed (eg, it contains two @separator characters in a
* @params cannot be parsed (eg, it contains two @separators characters in a
* row), then %NULL is returned.
*
* Since: 2.66
@ -1776,15 +1779,17 @@ str_ascii_case_equal (gconstpointer v1,
GHashTable *
g_uri_parse_params (const gchar *params,
gssize length,
gchar separator,
const gchar *separators,
gboolean case_insensitive)
{
GHashTable *hash;
const gchar *end, *attr, *attr_end, *value, *value_end;
const gchar *end, *attr, *attr_end, *value, *value_end, *s;
gchar *decoded_attr, *decoded_value;
guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
g_return_val_if_fail (length == 0 || params != NULL, NULL);
g_return_val_if_fail (length >= -1, NULL);
g_return_val_if_fail (separators != NULL, NULL);
if (case_insensitive)
{
@ -1803,12 +1808,19 @@ g_uri_parse_params (const gchar *params,
else
end = params + length;
memset (sep_table, FALSE, sizeof (sep_table));
for (s = separators; *s != '\0'; ++s)
sep_table[*(guchar *)s] = TRUE;
attr = params;
while (attr < end)
{
value_end = memchr (attr, separator, end - attr);
if (!value_end)
value_end = end;
/* Check if each character in @attr is a separator, by indexing by the
* character value into the @sep_table, which has value 1 stored at an
* index if that index is a separator. */
for (value_end = attr; value_end < end; value_end++)
if (sep_table[*(guchar *)value_end])
break;
attr_end = memchr (attr, '=', value_end - attr);
if (!attr_end)

View File

@ -225,7 +225,7 @@ GUriFlags g_uri_get_flags (GUri *uri);
GLIB_AVAILABLE_IN_2_66
GHashTable * g_uri_parse_params (const gchar *params,
gssize length,
gchar separator,
const gchar *separators,
gboolean case_insensitive);
/**

View File

@ -1270,22 +1270,24 @@ test_uri_parse_params (gconstpointer test_data)
{
/* Inputs */
const gchar *uri;
gchar separator;
gchar *separators;
gboolean case_insensitive;
/* Outputs */
gssize expected_n_params; /* -1 => error expected */
/* key, value, key, value, …, limited to length 2*expected_n_params */
const gchar *expected_param_key_values[4];
const gchar *expected_param_key_values[6];
}
tests[] =
{
{ "", '&', FALSE, 0, { NULL, }},
{ "p1=foo&p2=bar", '&', FALSE, 2, { "p1", "foo", "p2", "bar" }},
{ "p1=foo&&P1=bar", '&', FALSE, -1, { NULL, }},
{ "%00=foo", '&', FALSE, -1, { NULL, }},
{ "p1=%00", '&', FALSE, -1, { NULL, }},
{ "p1=foo&P1=bar", '&', TRUE, 1, { "p1", "bar", NULL, }},
{ "=%", '&', FALSE, 1, { "", "%", NULL, }},
{ "", "&", FALSE, 0, { NULL, }},
{ "p1=foo&p2=bar", "&", FALSE, 2, { "p1", "foo", "p2", "bar" }},
{ "p1=foo&p2=bar;p3=baz", "&;", FALSE, 3, { "p1", "foo", "p2", "bar", "p3", "baz" }},
{ "p1=foo&p2=bar", "", FALSE, 1, { "p1", "foo&p2=bar" }},
{ "p1=foo&&P1=bar", "&", FALSE, -1, { NULL, }},
{ "%00=foo", "&", FALSE, -1, { NULL, }},
{ "p1=%00", "&", FALSE, -1, { NULL, }},
{ "p1=foo&P1=bar", "&", TRUE, 1, { "p1", "bar", NULL, }},
{ "=%", "&", FALSE, 1, { "", "%", NULL, }},
};
gsize i;
@ -1315,7 +1317,7 @@ test_uri_parse_params (gconstpointer test_data)
uri = g_memdup (tests[i].uri, uri_len);
}
params = g_uri_parse_params (uri, uri_len, tests[i].separator, tests[i].case_insensitive);
params = g_uri_parse_params (uri, uri_len, tests[i].separators, tests[i].case_insensitive);
if (tests[i].expected_n_params < 0)
{