diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt index 3cefbe010..d38fccf40 100644 --- a/docs/reference/glib/glib-sections.txt +++ b/docs/reference/glib/glib-sections.txt @@ -3373,7 +3373,10 @@ g_uri_get_query g_uri_get_fragment g_uri_get_flags +GUriParamsIter GUriParamsFlags +g_uri_params_iter_init +g_uri_params_iter_next g_uri_parse_params G_URI_RESERVED_CHARS_ALLOWED_IN_PATH diff --git a/glib/guri.c b/glib/guri.c index 92c6128df..2890d44e4 100644 --- a/glib/guri.c +++ b/glib/guri.c @@ -1761,6 +1761,172 @@ str_ascii_case_equal (gconstpointer v1, return g_ascii_strcasecmp (string1, string2) == 0; } +/** + * GUriParamsIter: + * + * Many URI schemes include one or more attribute/value pairs as part of the URI + * value (for example "scheme://server/path?query=string&is=there" has two + * attributes "query=string" and "is=there" in its query part). + * + * A #GUriParamsIter structure represents an iterator that can be used to + * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter + * structures are typically allocated on the stack and then initialized with + * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init() + * for a usage example. + * + * Since: 2.66 + */ +typedef struct +{ + GUriParamsFlags flags; + const gchar *attr; + const gchar *end; + guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */ +} RealIter; + +G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter)); +G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter)); + +/** + * g_uri_params_iter_init: + * @iter: an uninitalized #GUriParamsIter + * @params: a `%`-encoded string containing "attribute=value" + * parameters + * @length: the length of @params, or -1 if it is NUL-terminated + * @separators: the separator byte character set between parameters. (usually + * "&", but sometimes ";" or both "&;"). Note that this function works on + * bytes not characters, so it can't be used to delimit UTF-8 strings for + * anything but ASCII characters. You may pass an empty set, in which case + * no splitting will occur. + * @flags: flags to modify the way the parameters are handled. + * + * Initializes an attribute/value pair iterator. The iterator keeps references + * over the @params and @separators arguments, those variables must thus outlive + * the iterator and not be modified during the iteration. + * + * |[ + * GUriParamsIter iter; + * GError *error = NULL; + * gchar *attr, *value; + * + * g_uri_params_iter_init (&iter, "foo=bar&baz=bar", -1, "&", G_URI_PARAMS_NONE); + * while (g_uri_params_iter_next (&iter, &attr, &value, &error)) + * { + * // do something with attr and value + * g_free (attr); + * g_free (value); + * } + * if (error) + * // handle parsing error + * ]| + * + * Since: 2.66 + */ +void +g_uri_params_iter_init (GUriParamsIter *iter, + const gchar *params, + gssize length, + const gchar *separators, + GUriParamsFlags flags) +{ + RealIter *ri = (RealIter *)iter; + const gchar *s; + + g_return_if_fail (iter != NULL); + g_return_if_fail (length == 0 || params != NULL); + g_return_if_fail (length >= -1); + g_return_if_fail (separators != NULL); + + ri->flags = flags; + + if (length == -1) + ri->end = params + strlen (params); + else + ri->end = params + length; + + memset (ri->sep_table, FALSE, sizeof (ri->sep_table)); + for (s = separators; *s != '\0'; ++s) + ri->sep_table[*(guchar *)s] = TRUE; + + ri->attr = params; +} + +/** + * g_uri_params_iter_next: + * @iter: an initialized #GUriParamsIter + * @attribute: (out) (nullable) (optional) (transfer full): on return, contains + * the attribute, or %NULL. + * @value: (out) (nullable) (optional) (transfer full): on return, contains + * the value, or %NULL. + * @error: #GError for error reporting, or %NULL to ignore. + * + * Advances @iter and retrieves the next attribute/value. If %FALSE is returned, + * @attribute and @value are not set, and the iterator becomes invalid. Note + * that the same attribute value may be returned multiple times, since URIs + * allow repeated attributes. + * + * Returns: %FALSE if the end of the parameters has been reached or an error was + * encountered. + * + * Since: 2.66 + */ +gboolean +g_uri_params_iter_next (GUriParamsIter *iter, + gchar **attribute, + gchar **value, + GError **error) +{ + RealIter *ri = (RealIter *)iter; + const gchar *attr_end, *val, *val_end; + gchar *decoded_attr, *decoded_value; + gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM; + + g_return_val_if_fail (iter != NULL, FALSE); + g_return_val_if_fail (error == NULL || *error == NULL, FALSE); + + if (ri->attr >= ri->end) + return FALSE; + + /* Check if each character in @attr is a separator, by indexing by the + * character value into the @sep_table, which has value 1 stored at an + * index if that index is a separator. */ + for (val_end = ri->attr; val_end < ri->end; val_end++) + if (ri->sep_table[*(guchar *)val_end]) + break; + + attr_end = memchr (ri->attr, '=', val_end - ri->attr); + if (!attr_end) + { + g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC, + _("Missing '=' and parameter value")); + return FALSE; + } + if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr, + www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error)) + { + return FALSE; + } + + val = attr_end + 1; + if (!uri_decode (&decoded_value, NULL, val, val_end - val, + www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error)) + { + g_free (decoded_attr); + return FALSE; + } + + if (attribute) + *attribute = g_steal_pointer (&decoded_attr); + if (value) + *value = g_steal_pointer (&decoded_value); + + g_free (decoded_attr); + g_free (decoded_value); + + ri->attr = val_end + 1; + return TRUE; +} + /** * g_uri_parse_params: * @params: a `%`-encoded string containing "attribute=value" @@ -1775,7 +1941,10 @@ str_ascii_case_equal (gconstpointer v1, * @error: #GError for error reporting, or %NULL to ignore. * * Many URI schemes include one or more attribute/value pairs as part of the URI - * value. This method can be used to parse them into a hash table. + * value. This method can be used to parse them into a hash table. When an + * attribute has multiple occurences, the last value is the final returned + * value. If you need to handle repeated attributes differently, use + * #GUriParamsIter. * * The @params string is assumed to still be `%`-encoded, but the returned * values will be fully decoded. (Thus it is possible that the returned values @@ -1801,10 +1970,9 @@ g_uri_parse_params (const gchar *params, GError **error) { GHashTable *hash; - const gchar *end, *attr, *attr_end, *value, *value_end, *s; - gchar *decoded_attr, *decoded_value; - guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */ - gboolean www_form = flags & G_URI_PARAMS_WWW_FORM; + GUriParamsIter iter; + gchar *attribute, *value; + GError *err = NULL; g_return_val_if_fail (length == 0 || params != NULL, NULL); g_return_val_if_fail (length >= -1, NULL); @@ -1823,51 +1991,16 @@ g_uri_parse_params (const gchar *params, g_free, g_free); } - if (length == -1) - end = params + strlen (params); - else - end = params + length; + g_uri_params_iter_init (&iter, params, length, separators, flags); - memset (sep_table, FALSE, sizeof (sep_table)); - for (s = separators; *s != '\0'; ++s) - sep_table[*(guchar *)s] = TRUE; + while (g_uri_params_iter_next (&iter, &attribute, &value, &err)) + g_hash_table_insert (hash, attribute, value); - attr = params; - while (attr < end) + if (err) { - /* Check if each character in @attr is a separator, by indexing by the - * character value into the @sep_table, which has value 1 stored at an - * index if that index is a separator. */ - for (value_end = attr; value_end < end; value_end++) - if (sep_table[*(guchar *)value_end]) - break; - - attr_end = memchr (attr, '=', value_end - attr); - if (!attr_end) - { - g_hash_table_destroy (hash); - g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC, - _("Missing '=' and parameter value")); - return NULL; - } - if (!uri_decode (&decoded_attr, NULL, attr, attr_end - attr, - www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error)) - { - g_hash_table_destroy (hash); - return NULL; - } - - value = attr_end + 1; - if (!uri_decode (&decoded_value, NULL, value, value_end - value, - www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error)) - { - g_free (decoded_attr); - g_hash_table_destroy (hash); - return NULL; - } - - g_hash_table_insert (hash, decoded_attr, decoded_value); - attr = value_end + 1; + g_propagate_error (error, g_steal_pointer (&err)); + g_hash_table_destroy (hash); + return NULL; } return hash; diff --git a/glib/guri.h b/glib/guri.h index e2bc04b31..59f6933c5 100644 --- a/glib/guri.h +++ b/glib/guri.h @@ -254,6 +254,29 @@ GHashTable *g_uri_parse_params (const gchar *params, GUriParamsFlags flags, GError **error); +typedef struct _GUriParamsIter GUriParamsIter; + +struct _GUriParamsIter +{ + /*< private >*/ + gint dummy0; + gpointer dummy1; + gpointer dummy2; + guint8 dummy3[256]; +}; + +GLIB_AVAILABLE_IN_2_66 +void g_uri_params_iter_init (GUriParamsIter *iter, + const gchar *params, + gssize length, + const gchar *separators, + GUriParamsFlags flags); + +GLIB_AVAILABLE_IN_2_66 +gboolean g_uri_params_iter_next (GUriParamsIter *iter, + gchar **attribute, + gchar **value, + GError **error); /** * G_URI_ERROR: * diff --git a/glib/tests/uri.c b/glib/tests/uri.c index 121749d76..ae651b0a3 100644 --- a/glib/tests/uri.c +++ b/glib/tests/uri.c @@ -1351,50 +1351,75 @@ test_uri_is_valid (void) g_clear_error (&error); } +static const struct +{ + /* Inputs */ + const gchar *uri; + gchar *separators; + GUriParamsFlags flags; + /* Outputs */ + /* key, value, key, value, …, limited to length 2*expected_n_params */ + gssize expected_n_iter; /* -1 => error expected */ + const gchar *expected_iter_key_values[6]; + gssize expected_n_params; /* -1 => error expected */ + const gchar *expected_param_key_values[6]; +} params_tests[] = + { + { "p1=foo&p2=bar;p3=baz", "&;", G_URI_PARAMS_NONE, + 3, { "p1", "foo", "p2", "bar", "p3", "baz" }, + 3, { "p1", "foo", "p2", "bar", "p3", "baz" }}, + { "p1=foo&p2=bar", "", G_URI_PARAMS_NONE, + 1, { "p1", "foo&p2=bar" }, + 1, { "p1", "foo&p2=bar" }}, + { "p1=foo&&P1=bar", "&", G_URI_PARAMS_NONE, + 1, { "p1", "foo" }, + -1, { NULL, }}, + { "%00=foo", "&", G_URI_PARAMS_NONE, + 0, { NULL, }, + -1, { NULL, }}, + { "p1=%00", "&", G_URI_PARAMS_NONE, + 0, { NULL, }, + -1, { NULL, }}, + { "p1=foo&p1=bar", "&", G_URI_PARAMS_NONE, + 2, { "p1", "foo", "p1", "bar" }, + 1, { "p1", "bar", NULL, }}, + { "p1=foo&P1=bar", "&", G_URI_PARAMS_CASE_INSENSITIVE, + 2, { "p1", "foo", "P1", "bar" }, + 1, { "p1", "bar", NULL, }}, + { "=%", "&", G_URI_PARAMS_NONE, + 1, { "", "%", NULL, }, + 1, { "", "%", NULL, }}, + { "=", "&", G_URI_PARAMS_NONE, + 1, { "", "", NULL, }, + 1, { "", "", NULL, }}, + { "foo", "&", G_URI_PARAMS_NONE, + 0, { NULL, }, + -1, { NULL, }}, + { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_WWW_FORM, + 2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, }, + 2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, }}, + { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_NONE, + 2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, }, + 2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, }}, + }; + static void -test_uri_parse_params (gconstpointer test_data) +test_uri_iter_params (gconstpointer test_data) { GError *err = NULL; gboolean use_nul_terminated = GPOINTER_TO_INT (test_data); - const struct - { - /* Inputs */ - const gchar *uri; - gchar *separators; - GUriParamsFlags flags; - /* Outputs */ - gssize expected_n_params; /* -1 => error expected */ - /* key, value, key, value, …, limited to length 2*expected_n_params */ - const gchar *expected_param_key_values[6]; - } - tests[] = - { - { "p1=foo&p2=bar;p3=baz", "&;", G_URI_PARAMS_NONE, 3, { "p1", "foo", "p2", "bar", "p3", "baz" }}, - { "p1=foo&p2=bar", "", G_URI_PARAMS_NONE, 1, { "p1", "foo&p2=bar" }}, - { "p1=foo&&P1=bar", "&", G_URI_PARAMS_NONE, -1, { NULL, }}, - { "%00=foo", "&", G_URI_PARAMS_NONE, -1, { NULL, }}, - { "p1=%00", "&", G_URI_PARAMS_NONE, -1, { NULL, }}, - { "p1=foo&P1=bar", "&", G_URI_PARAMS_CASE_INSENSITIVE, 1, { "p1", "bar", NULL, }}, - { "=%", "&", G_URI_PARAMS_NONE, 1, { "", "%", NULL, }}, - { "=", "&", G_URI_PARAMS_NONE, 1, { "", "", NULL, }}, - { "foo", "&", G_URI_PARAMS_NONE, -1, { NULL, }}, - { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_WWW_FORM, - 2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, }}, - { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_NONE, - 2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, }}, - }; - gsize i; + gsize i, n; - for (i = 0; i < G_N_ELEMENTS (tests); i++) + for (i = 0; i < G_N_ELEMENTS (params_tests); i++) { - GHashTable *params; - gchar *uri = NULL; + GUriParamsIter iter; + gchar *uri, *attr, *value; gssize uri_len; - g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, tests[i].uri); + g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, params_tests[i].uri); - g_assert (tests[i].expected_n_params < 0 || - tests[i].expected_n_params <= G_N_ELEMENTS (tests[i].expected_param_key_values) / 2); + g_assert (params_tests[i].expected_n_params < 0 || + params_tests[i].expected_n_params <= G_N_ELEMENTS (params_tests[i].expected_param_key_values) / 2); /* The tests get run twice: once with the length unspecified, using a * nul-terminated string; and once with the length specified and a copy of @@ -1403,17 +1428,70 @@ test_uri_parse_params (gconstpointer test_data) if (use_nul_terminated) { uri_len = -1; - uri = g_strdup (tests[i].uri); + uri = g_strdup (params_tests[i].uri); } else { - uri_len = strlen (tests[i].uri); /* no trailing nul */ - uri = g_memdup (tests[i].uri, uri_len); + uri_len = strlen (params_tests[i].uri); /* no trailing nul */ + uri = g_memdup (params_tests[i].uri, uri_len); } - params = g_uri_parse_params (uri, uri_len, tests[i].separators, tests[i].flags, &err); + n = 0; + g_uri_params_iter_init (&iter, params_tests[i].uri, -1, params_tests[i].separators, params_tests[i].flags); + while (g_uri_params_iter_next (&iter, &attr, &value, &err)) + { + g_assert_cmpstr (attr, ==, params_tests[i].expected_iter_key_values[n * 2]); + g_assert_cmpstr (value, ==, params_tests[i].expected_iter_key_values[n * 2 + 1]); + n++; + g_free (attr); + g_free (value); + } + g_assert_cmpint (n, ==, params_tests[i].expected_n_iter); + if (err) + { + g_assert_error (err, G_URI_ERROR, G_URI_ERROR_MISC); + g_clear_error (&err); + } + g_free (uri); + } +} - if (tests[i].expected_n_params < 0) +static void +test_uri_parse_params (gconstpointer test_data) +{ + GError *err = NULL; + gboolean use_nul_terminated = GPOINTER_TO_INT (test_data); + gsize i; + + for (i = 0; i < G_N_ELEMENTS (params_tests); i++) + { + GHashTable *params; + gchar *uri = NULL; + gssize uri_len; + + g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, params_tests[i].uri); + + g_assert (params_tests[i].expected_n_params < 0 || + params_tests[i].expected_n_params <= G_N_ELEMENTS (params_tests[i].expected_param_key_values) / 2); + + /* The tests get run twice: once with the length unspecified, using a + * nul-terminated string; and once with the length specified and a copy of + * the string with the trailing nul explicitly removed (to help catch + * buffer overflows). */ + if (use_nul_terminated) + { + uri_len = -1; + uri = g_strdup (params_tests[i].uri); + } + else + { + uri_len = strlen (params_tests[i].uri); /* no trailing nul */ + uri = g_memdup (params_tests[i].uri, uri_len); + } + + params = g_uri_parse_params (uri, uri_len, params_tests[i].separators, params_tests[i].flags, &err); + + if (params_tests[i].expected_n_params < 0) { g_assert_null (params); g_assert_error (err, G_URI_ERROR, G_URI_ERROR_MISC); @@ -1424,11 +1502,11 @@ test_uri_parse_params (gconstpointer test_data) gsize j; g_assert_no_error (err); - g_assert_cmpint (g_hash_table_size (params), ==, tests[i].expected_n_params); + g_assert_cmpint (g_hash_table_size (params), ==, params_tests[i].expected_n_params); - for (j = 0; j < tests[i].expected_n_params; j += 2) - g_assert_cmpstr (g_hash_table_lookup (params, tests[i].expected_param_key_values[j]), ==, - tests[i].expected_param_key_values[j + 1]); + for (j = 0; j < params_tests[i].expected_n_params; j += 2) + g_assert_cmpstr (g_hash_table_lookup (params, params_tests[i].expected_param_key_values[j]), ==, + params_tests[i].expected_param_key_values[j + 1]); } g_clear_pointer (¶ms, g_hash_table_unref); @@ -1490,6 +1568,8 @@ main (int argc, g_test_add_func ("/uri/is_valid", test_uri_is_valid); g_test_add_func ("/uri/to-string", test_uri_to_string); g_test_add_func ("/uri/join", test_uri_join); + g_test_add_data_func ("/uri/iter-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_iter_params); + g_test_add_data_func ("/uri/iter-params/length", GINT_TO_POINTER (FALSE), test_uri_iter_params); g_test_add_data_func ("/uri/parse-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_parse_params); g_test_add_data_func ("/uri/parse-params/length", GINT_TO_POINTER (FALSE), test_uri_parse_params);