Merge branch 'uri-parse-params' into 'master'

A few g_uri_parse_params() improvements

See merge request GNOME/glib!1557
This commit is contained in:
Philip Withnall 2020-07-08 10:53:11 +00:00
commit 735c80dc5b
6 changed files with 144 additions and 40 deletions

View File

@ -3371,6 +3371,7 @@ g_uri_get_query
g_uri_get_fragment g_uri_get_fragment
g_uri_get_flags g_uri_get_flags
<SUBSECTION> <SUBSECTION>
GUriParamsFlags
g_uri_parse_params g_uri_parse_params
<SUBSECTION> <SUBSECTION>
G_URI_RESERVED_CHARS_ALLOWED_IN_PATH G_URI_RESERVED_CHARS_ALLOWED_IN_PATH

View File

@ -3,6 +3,7 @@
int int
LLVMFuzzerTestOneInput (const unsigned char *data, size_t size) LLVMFuzzerTestOneInput (const unsigned char *data, size_t size)
{ {
GError *error = NULL;
GHashTable *parsed_params = NULL; GHashTable *parsed_params = NULL;
fuzz_set_logging_func (); fuzz_set_logging_func ();
@ -10,10 +11,17 @@ LLVMFuzzerTestOneInput (const unsigned char *data, size_t size)
if (size > G_MAXSSIZE) if (size > G_MAXSSIZE)
return 0; return 0;
parsed_params = g_uri_parse_params ((const gchar *) data, (gssize) size, '&', FALSE); parsed_params = g_uri_parse_params ((const gchar *) data, (gssize) size,
"&", G_URI_PARAMS_NONE, &error);
if (parsed_params == NULL) if (parsed_params == NULL)
return 0; {
g_assert (error);
g_clear_error (&error);
return 0;
}
g_assert_no_error (error);
g_hash_table_unref (parsed_params); g_hash_table_unref (parsed_params);
return 0; return 0;

View File

@ -2393,7 +2393,8 @@ g_strsplit (const gchar *string,
* g_strsplit_set: * g_strsplit_set:
* @string: The string to be tokenized * @string: The string to be tokenized
* @delimiters: A nul-terminated string containing bytes that are used * @delimiters: A nul-terminated string containing bytes that are used
* to split the string. * to split the string (it can accept an empty string, which will result
* in no string splitting).
* @max_tokens: The maximum number of tokens to split @string into. * @max_tokens: The maximum number of tokens to split @string into.
* If this is less than 1, the string is split completely * If this is less than 1, the string is split completely
* *
@ -2429,7 +2430,7 @@ g_strsplit_set (const gchar *string,
const gchar *delimiters, const gchar *delimiters,
gint max_tokens) gint max_tokens)
{ {
gboolean delim_table[256]; guint8 delim_table[256]; /* 1 = index is a separator; 0 otherwise */
GSList *tokens, *list; GSList *tokens, *list;
gint n_tokens; gint n_tokens;
const gchar *s; const gchar *s;
@ -2450,6 +2451,9 @@ g_strsplit_set (const gchar *string,
return result; return result;
} }
/* Check if each character in @string is a separator, by indexing by the
* character value into the @delim_table, which has value 1 stored at an index
* if that index is a separator. */
memset (delim_table, FALSE, sizeof (delim_table)); memset (delim_table, FALSE, sizeof (delim_table));
for (s = delimiters; *s != '\0'; ++s) for (s = delimiters; *s != '\0'; ++s)
delim_table[*(guchar *)s] = TRUE; delim_table[*(guchar *)s] = TRUE;

View File

@ -148,7 +148,10 @@
* g_assert_error(err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY); * g_assert_error(err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY);
* ]| * ]|
* *
* (you should pass %G_URI_FLAGS_ENCODED if you need to handle that case manually). * You should pass %G_URI_FLAGS_ENCODED or %G_URI_FLAGS_ENCODED_QUERY if you
* need to handle that case manually. In particular, if the query string
* contains '=' characters that are '%'-encoded, you should let
* g_uri_parse_params() do the decoding once of the query.
* *
* #GUri is immutable once constructed, and can safely be accessed from * #GUri is immutable once constructed, and can safely be accessed from
* multiple threads. Its reference counting is atomic. * multiple threads. Its reference counting is atomic.
@ -237,6 +240,7 @@ uri_decoder (gchar **out,
const gchar *start, const gchar *start,
gsize length, gsize length,
gboolean just_normalize, gboolean just_normalize,
gboolean www_form,
GUriFlags flags, GUriFlags flags,
GUriError parse_error, GUriError parse_error,
GError **error) GError **error)
@ -287,6 +291,8 @@ uri_decoder (gchar **out,
s += 2; s += 2;
} }
} }
else if (www_form && *s == '+')
*d++ = ' ';
else else
*d++ = *s; *d++ = *s;
} }
@ -314,11 +320,12 @@ static gboolean
uri_decode (gchar **out, uri_decode (gchar **out,
const gchar *start, const gchar *start,
gsize length, gsize length,
gboolean www_form,
GUriFlags flags, GUriFlags flags,
GUriError parse_error, GUriError parse_error,
GError **error) GError **error)
{ {
return uri_decoder (out, start, length, FALSE, flags, return uri_decoder (out, start, length, FALSE, www_form, flags,
parse_error, error) != -1; parse_error, error) != -1;
} }
@ -330,7 +337,7 @@ uri_normalize (gchar **out,
GUriError parse_error, GUriError parse_error,
GError **error) GError **error)
{ {
return uri_decoder (out, start, length, TRUE, flags, return uri_decoder (out, start, length, TRUE, FALSE, flags,
parse_error, error) != -1; parse_error, error) != -1;
} }
@ -450,7 +457,7 @@ parse_host (const gchar *start,
} }
flags &= ~G_URI_FLAGS_ENCODED; flags &= ~G_URI_FLAGS_ENCODED;
if (!uri_decode (&decoded, start, length, flags, if (!uri_decode (&decoded, start, length, FALSE, flags,
G_URI_ERROR_BAD_HOST, error)) G_URI_ERROR_BAD_HOST, error))
return FALSE; return FALSE;
@ -771,7 +778,7 @@ g_uri_split_internal (const gchar *uri_string,
end = p + strcspn (p, "#"); end = p + strcspn (p, "#");
if (*end == '#') if (*end == '#')
{ {
if (!uri_decode (fragment, end + 1, strlen (end + 1), flags, if (!uri_decode (fragment, end + 1, strlen (end + 1), FALSE, flags,
G_URI_ERROR_BAD_FRAGMENT, error)) G_URI_ERROR_BAD_FRAGMENT, error))
goto fail; goto fail;
} }
@ -780,7 +787,8 @@ g_uri_split_internal (const gchar *uri_string,
question = memchr (p, '?', end - p); question = memchr (p, '?', end - p);
if (question) if (question)
{ {
if (!uri_normalize (query, question + 1, end - (question + 1), flags, if (!uri_normalize (query, question + 1, end - (question + 1),
flags | (flags & G_URI_FLAGS_ENCODED_QUERY ? G_URI_FLAGS_ENCODED : 0),
G_URI_ERROR_BAD_QUERY, error)) G_URI_ERROR_BAD_QUERY, error))
goto fail; goto fail;
end = question; end = question;
@ -1395,7 +1403,7 @@ g_uri_join_internal (GUriFlags flags,
if (query) if (query)
{ {
g_string_append_c (str, '?'); g_string_append_c (str, '?');
if (encoded) if (encoded || flags & G_URI_FLAGS_ENCODED_QUERY)
g_string_append (str, query); g_string_append (str, query);
else else
g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE); g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE);
@ -1750,16 +1758,20 @@ str_ascii_case_equal (gconstpointer v1,
* @params: a `%`-encoded string containing "attribute=value" * @params: a `%`-encoded string containing "attribute=value"
* parameters * parameters
* @length: the length of @params, or -1 if it is NUL-terminated * @length: the length of @params, or -1 if it is NUL-terminated
* @separator: the separator character between parameters. * @separators: the separator byte character set between parameters. (usually
* (usually ';', but sometimes '&') * "&", but sometimes ";" or both "&;"). Note that this function works on
* @case_insensitive: whether parameter names are case insensitive * bytes not characters, so it can't be used to delimit UTF-8 strings for
* anything but ASCII characters. You may pass an empty set, in which case
* no splitting will occur.
* @flags: flags to modify the way the parameters are handled.
* @error: #GError for error reporting, or %NULL to ignore.
* *
* Many URI schemes include one or more attribute/value pairs as part of the URI * Many URI schemes include one or more attribute/value pairs as part of the URI
* value. This method can be used to parse them into a hash table. * value. This method can be used to parse them into a hash table.
* *
* The @params string is assumed to still be `%`-encoded, but the returned * The @params string is assumed to still be `%`-encoded, but the returned
* values will be fully decoded. (Thus it is possible that the returned values * values will be fully decoded. (Thus it is possible that the returned values
* may contain '=' or @separator, if the value was encoded in the input.) * may contain '=' or @separators, if the value was encoded in the input.)
* Invalid `%`-encoding is treated as with the non-%G_URI_FLAGS_PARSE_STRICT * Invalid `%`-encoding is treated as with the non-%G_URI_FLAGS_PARSE_STRICT
* rules for g_uri_parse(). (However, if @params is the path or query string * rules for g_uri_parse(). (However, if @params is the path or query string
* from a #GUri that was parsed with %G_URI_FLAGS_PARSE_STRICT and * from a #GUri that was parsed with %G_URI_FLAGS_PARSE_STRICT and
@ -1768,7 +1780,7 @@ str_ascii_case_equal (gconstpointer v1,
* *
* Return value: (transfer full) (element-type utf8 utf8): a hash table of * Return value: (transfer full) (element-type utf8 utf8): a hash table of
* attribute/value pairs. Both names and values will be fully-decoded. If * attribute/value pairs. Both names and values will be fully-decoded. If
* @params cannot be parsed (eg, it contains two @separator characters in a * @params cannot be parsed (eg, it contains two @separators characters in a
* row), then %NULL is returned. * row), then %NULL is returned.
* *
* Since: 2.66 * Since: 2.66
@ -1776,17 +1788,22 @@ str_ascii_case_equal (gconstpointer v1,
GHashTable * GHashTable *
g_uri_parse_params (const gchar *params, g_uri_parse_params (const gchar *params,
gssize length, gssize length,
gchar separator, const gchar *separators,
gboolean case_insensitive) GUriParamsFlags flags,
GError **error)
{ {
GHashTable *hash; GHashTable *hash;
const gchar *end, *attr, *attr_end, *value, *value_end; const gchar *end, *attr, *attr_end, *value, *value_end, *s;
gchar *decoded_attr, *decoded_value; gchar *decoded_attr, *decoded_value;
guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
gboolean www_form = flags & G_URI_PARAMS_WWW_FORM;
g_return_val_if_fail (length == 0 || params != NULL, NULL); g_return_val_if_fail (length == 0 || params != NULL, NULL);
g_return_val_if_fail (length >= -1, NULL); g_return_val_if_fail (length >= -1, NULL);
g_return_val_if_fail (separators != NULL, NULL);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
if (case_insensitive) if (flags & G_URI_PARAMS_CASE_INSENSITIVE)
{ {
hash = g_hash_table_new_full (str_ascii_case_hash, hash = g_hash_table_new_full (str_ascii_case_hash,
str_ascii_case_equal, str_ascii_case_equal,
@ -1803,21 +1820,30 @@ g_uri_parse_params (const gchar *params,
else else
end = params + length; end = params + length;
memset (sep_table, FALSE, sizeof (sep_table));
for (s = separators; *s != '\0'; ++s)
sep_table[*(guchar *)s] = TRUE;
attr = params; attr = params;
while (attr < end) while (attr < end)
{ {
value_end = memchr (attr, separator, end - attr); /* Check if each character in @attr is a separator, by indexing by the
if (!value_end) * character value into the @sep_table, which has value 1 stored at an
value_end = end; * index if that index is a separator. */
for (value_end = attr; value_end < end; value_end++)
if (sep_table[*(guchar *)value_end])
break;
attr_end = memchr (attr, '=', value_end - attr); attr_end = memchr (attr, '=', value_end - attr);
if (!attr_end) if (!attr_end)
{ {
g_hash_table_destroy (hash); g_hash_table_destroy (hash);
g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC,
_("Missing '=' and parameter value"));
return NULL; return NULL;
} }
if (!uri_decode (&decoded_attr, attr, attr_end - attr, if (!uri_decode (&decoded_attr, attr, attr_end - attr,
0, G_URI_ERROR_MISC, NULL)) www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
{ {
g_hash_table_destroy (hash); g_hash_table_destroy (hash);
return NULL; return NULL;
@ -1825,7 +1851,7 @@ g_uri_parse_params (const gchar *params,
value = attr_end + 1; value = attr_end + 1;
if (!uri_decode (&decoded_value, value, value_end - value, if (!uri_decode (&decoded_value, value, value_end - value,
0, G_URI_ERROR_MISC, NULL)) www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
{ {
g_free (decoded_attr); g_free (decoded_attr);
g_hash_table_destroy (hash); g_hash_table_destroy (hash);
@ -2106,6 +2132,7 @@ g_uri_unescape_segment (const gchar *escaped_string,
if (!uri_decode (&unescaped, if (!uri_decode (&unescaped,
escaped_string, length, escaped_string, length,
FALSE,
G_URI_FLAGS_PARSE_STRICT, G_URI_FLAGS_PARSE_STRICT,
0, NULL)) 0, NULL))
return NULL; return NULL;
@ -2220,6 +2247,7 @@ g_uri_unescape_bytes (const gchar *escaped_string,
unescaped_length = uri_decoder (&buf, unescaped_length = uri_decoder (&buf,
escaped_string, length, escaped_string, length,
FALSE, FALSE,
FALSE,
G_URI_FLAGS_PARSE_STRICT|G_URI_FLAGS_ENCODED, G_URI_FLAGS_PARSE_STRICT|G_URI_FLAGS_ENCODED,
0, NULL); 0, NULL);
if (unescaped_length == -1) if (unescaped_length == -1)

View File

@ -53,6 +53,8 @@ void g_uri_unref (GUri *uri);
* %G_URI_FLAGS_NON_DNS is also set.) When building a URI, it indicates * %G_URI_FLAGS_NON_DNS is also set.) When building a URI, it indicates
* that you have already `%`-encoded the components, and so #GUri * that you have already `%`-encoded the components, and so #GUri
* should not do any encoding itself. * should not do any encoding itself.
* @G_URI_FLAGS_ENCODED_QUERY: Same as %G_URI_FLAGS_ENCODED, for the query
* field only.
* @G_URI_FLAGS_NONE: No flags set. * @G_URI_FLAGS_NONE: No flags set.
* *
* Flags that describe a URI. * Flags that describe a URI.
@ -72,6 +74,7 @@ typedef enum {
G_URI_FLAGS_HAS_AUTH_PARAMS = 1 << 2, G_URI_FLAGS_HAS_AUTH_PARAMS = 1 << 2,
G_URI_FLAGS_ENCODED = 1 << 3, G_URI_FLAGS_ENCODED = 1 << 3,
G_URI_FLAGS_NON_DNS = 1 << 4, G_URI_FLAGS_NON_DNS = 1 << 4,
G_URI_FLAGS_ENCODED_QUERY = 1 << 5,
} GUriFlags; } GUriFlags;
GLIB_AVAILABLE_IN_2_66 GLIB_AVAILABLE_IN_2_66
@ -222,11 +225,29 @@ const gchar *g_uri_get_fragment (GUri *uri);
GLIB_AVAILABLE_IN_2_66 GLIB_AVAILABLE_IN_2_66
GUriFlags g_uri_get_flags (GUri *uri); GUriFlags g_uri_get_flags (GUri *uri);
/**
* GUriParamsFlags:
* @G_URI_PARAMS_NONE: No flags set.
* @G_URI_PARAMS_CASE_INSENSITIVE: whether parameter names are case insensitive.
* @G_URI_PARAMS_WWW_FORM: replace `+` with space character.
*
* Flags modifying the way parameters are handled.
*
* Since: 2.66
*/
GLIB_AVAILABLE_TYPE_IN_2_66
typedef enum {
G_URI_PARAMS_NONE = 0,
G_URI_PARAMS_CASE_INSENSITIVE = 1 << 0,
G_URI_PARAMS_WWW_FORM = 1 << 1,
} GUriParamsFlags;
GLIB_AVAILABLE_IN_2_66 GLIB_AVAILABLE_IN_2_66
GHashTable * g_uri_parse_params (const gchar *params, GHashTable *g_uri_parse_params (const gchar *params,
gssize length, gssize length,
gchar separator, const gchar *separators,
gboolean case_insensitive); GUriParamsFlags flags,
GError **error);
/** /**
* G_URI_ERROR: * G_URI_ERROR:

View File

@ -1070,6 +1070,38 @@ test_uri_split (void)
g_assert_cmpstr (path, ==, ";oo/"); g_assert_cmpstr (path, ==, ";oo/");
g_free (path); g_free (path);
g_uri_split ("http://h%01st/path?saisons=%C3%89t%C3%A9%2Bhiver",
G_URI_FLAGS_NONE,
NULL,
NULL,
&host,
NULL,
NULL,
&query,
NULL,
&error);
g_assert_no_error (error);
g_assert_cmpstr (host, ==, "h\001st");
g_assert_cmpstr (query, ==, "saisons=Été+hiver");
g_free (host);
g_free (query);
g_uri_split ("http://h%01st/path?saisons=%C3%89t%C3%A9%2Bhiver",
G_URI_FLAGS_ENCODED_QUERY,
NULL,
NULL,
&host,
NULL,
NULL,
&query,
NULL,
&error);
g_assert_no_error (error);
g_assert_cmpstr (host, ==, "h\001st");
g_assert_cmpstr (query, ==, "saisons=%C3%89t%C3%A9%2Bhiver");
g_free (host);
g_free (query);
g_uri_split_with_user ("scheme://user:pass;auth@host:1234/path?query#fragment", g_uri_split_with_user ("scheme://user:pass;auth@host:1234/path?query#fragment",
G_URI_FLAGS_HAS_AUTH_PARAMS|G_URI_FLAGS_HAS_PASSWORD, G_URI_FLAGS_HAS_AUTH_PARAMS|G_URI_FLAGS_HAS_PASSWORD,
NULL, NULL,
@ -1265,27 +1297,34 @@ test_uri_is_valid (void)
static void static void
test_uri_parse_params (gconstpointer test_data) test_uri_parse_params (gconstpointer test_data)
{ {
GError *err = NULL;
gboolean use_nul_terminated = GPOINTER_TO_INT (test_data); gboolean use_nul_terminated = GPOINTER_TO_INT (test_data);
const struct const struct
{ {
/* Inputs */ /* Inputs */
const gchar *uri; const gchar *uri;
gchar separator; gchar *separators;
gboolean case_insensitive; GUriParamsFlags flags;
/* Outputs */ /* Outputs */
gssize expected_n_params; /* -1 => error expected */ gssize expected_n_params; /* -1 => error expected */
/* key, value, key, value, …, limited to length 2*expected_n_params */ /* key, value, key, value, …, limited to length 2*expected_n_params */
const gchar *expected_param_key_values[4]; const gchar *expected_param_key_values[6];
} }
tests[] = tests[] =
{ {
{ "", '&', FALSE, 0, { NULL, }}, { "p1=foo&p2=bar;p3=baz", "&;", G_URI_PARAMS_NONE, 3, { "p1", "foo", "p2", "bar", "p3", "baz" }},
{ "p1=foo&p2=bar", '&', FALSE, 2, { "p1", "foo", "p2", "bar" }}, { "p1=foo&p2=bar", "", G_URI_PARAMS_NONE, 1, { "p1", "foo&p2=bar" }},
{ "p1=foo&&P1=bar", '&', FALSE, -1, { NULL, }}, { "p1=foo&&P1=bar", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
{ "%00=foo", '&', FALSE, -1, { NULL, }}, { "%00=foo", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
{ "p1=%00", '&', FALSE, -1, { NULL, }}, { "p1=%00", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
{ "p1=foo&P1=bar", '&', TRUE, 1, { "p1", "bar", NULL, }}, { "p1=foo&P1=bar", "&", G_URI_PARAMS_CASE_INSENSITIVE, 1, { "p1", "bar", NULL, }},
{ "=%", '&', FALSE, 1, { "", "%", NULL, }}, { "=%", "&", G_URI_PARAMS_NONE, 1, { "", "%", NULL, }},
{ "=", "&", G_URI_PARAMS_NONE, 1, { "", "", NULL, }},
{ "foo", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
{ "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_WWW_FORM,
2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, }},
{ "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_NONE,
2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, }},
}; };
gsize i; gsize i;
@ -1315,16 +1354,19 @@ test_uri_parse_params (gconstpointer test_data)
uri = g_memdup (tests[i].uri, uri_len); uri = g_memdup (tests[i].uri, uri_len);
} }
params = g_uri_parse_params (uri, uri_len, tests[i].separator, tests[i].case_insensitive); params = g_uri_parse_params (uri, uri_len, tests[i].separators, tests[i].flags, &err);
if (tests[i].expected_n_params < 0) if (tests[i].expected_n_params < 0)
{ {
g_assert_null (params); g_assert_null (params);
g_assert_error (err, G_URI_ERROR, G_URI_ERROR_MISC);
g_clear_error (&err);
} }
else else
{ {
gsize j; gsize j;
g_assert_no_error (err);
g_assert_cmpint (g_hash_table_size (params), ==, tests[i].expected_n_params); g_assert_cmpint (g_hash_table_size (params), ==, tests[i].expected_n_params);
for (j = 0; j < tests[i].expected_n_params; j += 2) for (j = 0; j < tests[i].expected_n_params; j += 2)