Merge branch 'uri-params-iter' into 'master'

Add GUriParamsIter

See merge request GNOME/glib!1572
This commit is contained in:
Philip Withnall 2020-08-05 16:07:42 +00:00
commit bb1df0e515
4 changed files with 330 additions and 91 deletions

View File

@ -3373,7 +3373,10 @@ g_uri_get_query
g_uri_get_fragment
g_uri_get_flags
<SUBSECTION>
GUriParamsIter
GUriParamsFlags
g_uri_params_iter_init
g_uri_params_iter_next
g_uri_parse_params
<SUBSECTION>
G_URI_RESERVED_CHARS_ALLOWED_IN_PATH

View File

@ -1761,6 +1761,172 @@ str_ascii_case_equal (gconstpointer v1,
return g_ascii_strcasecmp (string1, string2) == 0;
}
/**
* GUriParamsIter:
*
* Many URI schemes include one or more attribute/value pairs as part of the URI
* value (for example "scheme://server/path?query=string&is=there" has two
* attributes "query=string" and "is=there" in its query part).
*
* A #GUriParamsIter structure represents an iterator that can be used to
* iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
* structures are typically allocated on the stack and then initialized with
* g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
* for a usage example.
*
* Since: 2.66
*/
typedef struct
{
GUriParamsFlags flags;
const gchar *attr;
const gchar *end;
guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
} RealIter;
G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
/**
* g_uri_params_iter_init:
* @iter: an uninitalized #GUriParamsIter
* @params: a `%`-encoded string containing "attribute=value"
* parameters
* @length: the length of @params, or -1 if it is NUL-terminated
* @separators: the separator byte character set between parameters. (usually
* "&", but sometimes ";" or both "&;"). Note that this function works on
* bytes not characters, so it can't be used to delimit UTF-8 strings for
* anything but ASCII characters. You may pass an empty set, in which case
* no splitting will occur.
* @flags: flags to modify the way the parameters are handled.
*
* Initializes an attribute/value pair iterator. The iterator keeps references
* over the @params and @separators arguments, those variables must thus outlive
* the iterator and not be modified during the iteration.
*
* |[<!-- language="C" -->
* GUriParamsIter iter;
* GError *error = NULL;
* gchar *attr, *value;
*
* g_uri_params_iter_init (&iter, "foo=bar&baz=bar", -1, "&", G_URI_PARAMS_NONE);
* while (g_uri_params_iter_next (&iter, &attr, &value, &error))
* {
* // do something with attr and value
* g_free (attr);
* g_free (value);
* }
* if (error)
* // handle parsing error
* ]|
*
* Since: 2.66
*/
void
g_uri_params_iter_init (GUriParamsIter *iter,
const gchar *params,
gssize length,
const gchar *separators,
GUriParamsFlags flags)
{
RealIter *ri = (RealIter *)iter;
const gchar *s;
g_return_if_fail (iter != NULL);
g_return_if_fail (length == 0 || params != NULL);
g_return_if_fail (length >= -1);
g_return_if_fail (separators != NULL);
ri->flags = flags;
if (length == -1)
ri->end = params + strlen (params);
else
ri->end = params + length;
memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
for (s = separators; *s != '\0'; ++s)
ri->sep_table[*(guchar *)s] = TRUE;
ri->attr = params;
}
/**
* g_uri_params_iter_next:
* @iter: an initialized #GUriParamsIter
* @attribute: (out) (nullable) (optional) (transfer full): on return, contains
* the attribute, or %NULL.
* @value: (out) (nullable) (optional) (transfer full): on return, contains
* the value, or %NULL.
* @error: #GError for error reporting, or %NULL to ignore.
*
* Advances @iter and retrieves the next attribute/value. If %FALSE is returned,
* @attribute and @value are not set, and the iterator becomes invalid. Note
* that the same attribute value may be returned multiple times, since URIs
* allow repeated attributes.
*
* Returns: %FALSE if the end of the parameters has been reached or an error was
* encountered.
*
* Since: 2.66
*/
gboolean
g_uri_params_iter_next (GUriParamsIter *iter,
gchar **attribute,
gchar **value,
GError **error)
{
RealIter *ri = (RealIter *)iter;
const gchar *attr_end, *val, *val_end;
gchar *decoded_attr, *decoded_value;
gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
g_return_val_if_fail (iter != NULL, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
if (ri->attr >= ri->end)
return FALSE;
/* Check if each character in @attr is a separator, by indexing by the
* character value into the @sep_table, which has value 1 stored at an
* index if that index is a separator. */
for (val_end = ri->attr; val_end < ri->end; val_end++)
if (ri->sep_table[*(guchar *)val_end])
break;
attr_end = memchr (ri->attr, '=', val_end - ri->attr);
if (!attr_end)
{
g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC,
_("Missing '=' and parameter value"));
return FALSE;
}
if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
{
return FALSE;
}
val = attr_end + 1;
if (!uri_decode (&decoded_value, NULL, val, val_end - val,
www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
{
g_free (decoded_attr);
return FALSE;
}
if (attribute)
*attribute = g_steal_pointer (&decoded_attr);
if (value)
*value = g_steal_pointer (&decoded_value);
g_free (decoded_attr);
g_free (decoded_value);
ri->attr = val_end + 1;
return TRUE;
}
/**
* g_uri_parse_params:
* @params: a `%`-encoded string containing "attribute=value"
@ -1775,7 +1941,10 @@ str_ascii_case_equal (gconstpointer v1,
* @error: #GError for error reporting, or %NULL to ignore.
*
* Many URI schemes include one or more attribute/value pairs as part of the URI
* value. This method can be used to parse them into a hash table.
* value. This method can be used to parse them into a hash table. When an
* attribute has multiple occurences, the last value is the final returned
* value. If you need to handle repeated attributes differently, use
* #GUriParamsIter.
*
* The @params string is assumed to still be `%`-encoded, but the returned
* values will be fully decoded. (Thus it is possible that the returned values
@ -1801,10 +1970,9 @@ g_uri_parse_params (const gchar *params,
GError **error)
{
GHashTable *hash;
const gchar *end, *attr, *attr_end, *value, *value_end, *s;
gchar *decoded_attr, *decoded_value;
guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
gboolean www_form = flags & G_URI_PARAMS_WWW_FORM;
GUriParamsIter iter;
gchar *attribute, *value;
GError *err = NULL;
g_return_val_if_fail (length == 0 || params != NULL, NULL);
g_return_val_if_fail (length >= -1, NULL);
@ -1823,51 +1991,16 @@ g_uri_parse_params (const gchar *params,
g_free, g_free);
}
if (length == -1)
end = params + strlen (params);
else
end = params + length;
g_uri_params_iter_init (&iter, params, length, separators, flags);
memset (sep_table, FALSE, sizeof (sep_table));
for (s = separators; *s != '\0'; ++s)
sep_table[*(guchar *)s] = TRUE;
while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
g_hash_table_insert (hash, attribute, value);
attr = params;
while (attr < end)
if (err)
{
/* Check if each character in @attr is a separator, by indexing by the
* character value into the @sep_table, which has value 1 stored at an
* index if that index is a separator. */
for (value_end = attr; value_end < end; value_end++)
if (sep_table[*(guchar *)value_end])
break;
attr_end = memchr (attr, '=', value_end - attr);
if (!attr_end)
{
g_hash_table_destroy (hash);
g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC,
_("Missing '=' and parameter value"));
return NULL;
}
if (!uri_decode (&decoded_attr, NULL, attr, attr_end - attr,
www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
{
g_hash_table_destroy (hash);
return NULL;
}
value = attr_end + 1;
if (!uri_decode (&decoded_value, NULL, value, value_end - value,
www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
{
g_free (decoded_attr);
g_hash_table_destroy (hash);
return NULL;
}
g_hash_table_insert (hash, decoded_attr, decoded_value);
attr = value_end + 1;
g_propagate_error (error, g_steal_pointer (&err));
g_hash_table_destroy (hash);
return NULL;
}
return hash;

View File

@ -254,6 +254,29 @@ GHashTable *g_uri_parse_params (const gchar *params,
GUriParamsFlags flags,
GError **error);
typedef struct _GUriParamsIter GUriParamsIter;
struct _GUriParamsIter
{
/*< private >*/
gint dummy0;
gpointer dummy1;
gpointer dummy2;
guint8 dummy3[256];
};
GLIB_AVAILABLE_IN_2_66
void g_uri_params_iter_init (GUriParamsIter *iter,
const gchar *params,
gssize length,
const gchar *separators,
GUriParamsFlags flags);
GLIB_AVAILABLE_IN_2_66
gboolean g_uri_params_iter_next (GUriParamsIter *iter,
gchar **attribute,
gchar **value,
GError **error);
/**
* G_URI_ERROR:
*

View File

@ -1351,50 +1351,75 @@ test_uri_is_valid (void)
g_clear_error (&error);
}
static const struct
{
/* Inputs */
const gchar *uri;
gchar *separators;
GUriParamsFlags flags;
/* Outputs */
/* key, value, key, value, …, limited to length 2*expected_n_params */
gssize expected_n_iter; /* -1 => error expected */
const gchar *expected_iter_key_values[6];
gssize expected_n_params; /* -1 => error expected */
const gchar *expected_param_key_values[6];
} params_tests[] =
{
{ "p1=foo&p2=bar;p3=baz", "&;", G_URI_PARAMS_NONE,
3, { "p1", "foo", "p2", "bar", "p3", "baz" },
3, { "p1", "foo", "p2", "bar", "p3", "baz" }},
{ "p1=foo&p2=bar", "", G_URI_PARAMS_NONE,
1, { "p1", "foo&p2=bar" },
1, { "p1", "foo&p2=bar" }},
{ "p1=foo&&P1=bar", "&", G_URI_PARAMS_NONE,
1, { "p1", "foo" },
-1, { NULL, }},
{ "%00=foo", "&", G_URI_PARAMS_NONE,
0, { NULL, },
-1, { NULL, }},
{ "p1=%00", "&", G_URI_PARAMS_NONE,
0, { NULL, },
-1, { NULL, }},
{ "p1=foo&p1=bar", "&", G_URI_PARAMS_NONE,
2, { "p1", "foo", "p1", "bar" },
1, { "p1", "bar", NULL, }},
{ "p1=foo&P1=bar", "&", G_URI_PARAMS_CASE_INSENSITIVE,
2, { "p1", "foo", "P1", "bar" },
1, { "p1", "bar", NULL, }},
{ "=%", "&", G_URI_PARAMS_NONE,
1, { "", "%", NULL, },
1, { "", "%", NULL, }},
{ "=", "&", G_URI_PARAMS_NONE,
1, { "", "", NULL, },
1, { "", "", NULL, }},
{ "foo", "&", G_URI_PARAMS_NONE,
0, { NULL, },
-1, { NULL, }},
{ "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_WWW_FORM,
2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, },
2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, }},
{ "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_NONE,
2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, },
2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, }},
};
static void
test_uri_parse_params (gconstpointer test_data)
test_uri_iter_params (gconstpointer test_data)
{
GError *err = NULL;
gboolean use_nul_terminated = GPOINTER_TO_INT (test_data);
const struct
{
/* Inputs */
const gchar *uri;
gchar *separators;
GUriParamsFlags flags;
/* Outputs */
gssize expected_n_params; /* -1 => error expected */
/* key, value, key, value, …, limited to length 2*expected_n_params */
const gchar *expected_param_key_values[6];
}
tests[] =
{
{ "p1=foo&p2=bar;p3=baz", "&;", G_URI_PARAMS_NONE, 3, { "p1", "foo", "p2", "bar", "p3", "baz" }},
{ "p1=foo&p2=bar", "", G_URI_PARAMS_NONE, 1, { "p1", "foo&p2=bar" }},
{ "p1=foo&&P1=bar", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
{ "%00=foo", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
{ "p1=%00", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
{ "p1=foo&P1=bar", "&", G_URI_PARAMS_CASE_INSENSITIVE, 1, { "p1", "bar", NULL, }},
{ "=%", "&", G_URI_PARAMS_NONE, 1, { "", "%", NULL, }},
{ "=", "&", G_URI_PARAMS_NONE, 1, { "", "", NULL, }},
{ "foo", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
{ "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_WWW_FORM,
2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, }},
{ "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_NONE,
2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, }},
};
gsize i;
gsize i, n;
for (i = 0; i < G_N_ELEMENTS (tests); i++)
for (i = 0; i < G_N_ELEMENTS (params_tests); i++)
{
GHashTable *params;
gchar *uri = NULL;
GUriParamsIter iter;
gchar *uri, *attr, *value;
gssize uri_len;
g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, tests[i].uri);
g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, params_tests[i].uri);
g_assert (tests[i].expected_n_params < 0 ||
tests[i].expected_n_params <= G_N_ELEMENTS (tests[i].expected_param_key_values) / 2);
g_assert (params_tests[i].expected_n_params < 0 ||
params_tests[i].expected_n_params <= G_N_ELEMENTS (params_tests[i].expected_param_key_values) / 2);
/* The tests get run twice: once with the length unspecified, using a
* nul-terminated string; and once with the length specified and a copy of
@ -1403,17 +1428,70 @@ test_uri_parse_params (gconstpointer test_data)
if (use_nul_terminated)
{
uri_len = -1;
uri = g_strdup (tests[i].uri);
uri = g_strdup (params_tests[i].uri);
}
else
{
uri_len = strlen (tests[i].uri); /* no trailing nul */
uri = g_memdup (tests[i].uri, uri_len);
uri_len = strlen (params_tests[i].uri); /* no trailing nul */
uri = g_memdup (params_tests[i].uri, uri_len);
}
params = g_uri_parse_params (uri, uri_len, tests[i].separators, tests[i].flags, &err);
n = 0;
g_uri_params_iter_init (&iter, params_tests[i].uri, -1, params_tests[i].separators, params_tests[i].flags);
while (g_uri_params_iter_next (&iter, &attr, &value, &err))
{
g_assert_cmpstr (attr, ==, params_tests[i].expected_iter_key_values[n * 2]);
g_assert_cmpstr (value, ==, params_tests[i].expected_iter_key_values[n * 2 + 1]);
n++;
g_free (attr);
g_free (value);
}
g_assert_cmpint (n, ==, params_tests[i].expected_n_iter);
if (err)
{
g_assert_error (err, G_URI_ERROR, G_URI_ERROR_MISC);
g_clear_error (&err);
}
g_free (uri);
}
}
if (tests[i].expected_n_params < 0)
static void
test_uri_parse_params (gconstpointer test_data)
{
GError *err = NULL;
gboolean use_nul_terminated = GPOINTER_TO_INT (test_data);
gsize i;
for (i = 0; i < G_N_ELEMENTS (params_tests); i++)
{
GHashTable *params;
gchar *uri = NULL;
gssize uri_len;
g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, params_tests[i].uri);
g_assert (params_tests[i].expected_n_params < 0 ||
params_tests[i].expected_n_params <= G_N_ELEMENTS (params_tests[i].expected_param_key_values) / 2);
/* The tests get run twice: once with the length unspecified, using a
* nul-terminated string; and once with the length specified and a copy of
* the string with the trailing nul explicitly removed (to help catch
* buffer overflows). */
if (use_nul_terminated)
{
uri_len = -1;
uri = g_strdup (params_tests[i].uri);
}
else
{
uri_len = strlen (params_tests[i].uri); /* no trailing nul */
uri = g_memdup (params_tests[i].uri, uri_len);
}
params = g_uri_parse_params (uri, uri_len, params_tests[i].separators, params_tests[i].flags, &err);
if (params_tests[i].expected_n_params < 0)
{
g_assert_null (params);
g_assert_error (err, G_URI_ERROR, G_URI_ERROR_MISC);
@ -1424,11 +1502,11 @@ test_uri_parse_params (gconstpointer test_data)
gsize j;
g_assert_no_error (err);
g_assert_cmpint (g_hash_table_size (params), ==, tests[i].expected_n_params);
g_assert_cmpint (g_hash_table_size (params), ==, params_tests[i].expected_n_params);
for (j = 0; j < tests[i].expected_n_params; j += 2)
g_assert_cmpstr (g_hash_table_lookup (params, tests[i].expected_param_key_values[j]), ==,
tests[i].expected_param_key_values[j + 1]);
for (j = 0; j < params_tests[i].expected_n_params; j += 2)
g_assert_cmpstr (g_hash_table_lookup (params, params_tests[i].expected_param_key_values[j]), ==,
params_tests[i].expected_param_key_values[j + 1]);
}
g_clear_pointer (&params, g_hash_table_unref);
@ -1490,6 +1568,8 @@ main (int argc,
g_test_add_func ("/uri/is_valid", test_uri_is_valid);
g_test_add_func ("/uri/to-string", test_uri_to_string);
g_test_add_func ("/uri/join", test_uri_join);
g_test_add_data_func ("/uri/iter-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_iter_params);
g_test_add_data_func ("/uri/iter-params/length", GINT_TO_POINTER (FALSE), test_uri_iter_params);
g_test_add_data_func ("/uri/parse-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_parse_params);
g_test_add_data_func ("/uri/parse-params/length", GINT_TO_POINTER (FALSE), test_uri_parse_params);