gmarkup: Add g_markup_collect_known_attributes()

Add a variant of g_markup_collect_attributes() which will
ignore unknown attributes (such as those from different XML
namespaces) when parsing markup, rather than returning
G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE as g_markup_collect_attributes()
does.

Patch by Philip Withnall,
https://bugzilla.gnome.org/show_bug.cgi?id=665634
This commit is contained in:
Matthias Clasen 2012-08-16 23:40:08 -04:00
parent 21aff13d22
commit 8d40389d15
5 changed files with 375 additions and 245 deletions

View File

@ -1125,6 +1125,7 @@ g_markup_parse_context_pop
<SUBSECTION> <SUBSECTION>
GMarkupCollectType GMarkupCollectType
g_markup_collect_attributes g_markup_collect_attributes
g_markup_collect_known_attributes
<SUBSECTION Private> <SUBSECTION Private>
g_markup_error_quark g_markup_error_quark
</SECTION> </SECTION>

View File

@ -683,6 +683,7 @@ g_markup_parse_context_pop
g_markup_printf_escaped g_markup_printf_escaped
g_markup_vprintf_escaped g_markup_vprintf_escaped
g_markup_collect_attributes g_markup_collect_attributes
g_markup_collect_known_attributes
g_free g_free
g_clear_pointer g_clear_pointer
g_malloc g_malloc

View File

@ -2549,13 +2549,234 @@ g_markup_parse_boolean (const char *string,
* is set depending on what value type is used * is set depending on what value type is used
* *
* A mixed enumerated type and flags field. You must specify one type * A mixed enumerated type and flags field. You must specify one type
* (string, strdup, boolean, tristate). Additionally, you may optionally * (string, strdup, boolean, tristate). Additionally, you may optionally
* bitwise OR the type with the flag %G_MARKUP_COLLECT_OPTIONAL. * bitwise OR the type with the flag %G_MARKUP_COLLECT_OPTIONAL.
* *
* It is likely that this enum will be extended in the future to * It is likely that this enum will be extended in the future to
* support other types. * support other types.
*/ */
static gboolean
_g_markup_collect_attributesv (const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gboolean reject_unknown_attributes,
GError **error,
GMarkupCollectType first_type,
const gchar *first_attr,
va_list ap)
{
GMarkupCollectType type;
const gchar *attr;
guint64 collected;
int written;
int i;
va_list ap2;
type = first_type;
attr = first_attr;
collected = 0;
written = 0;
/* Take a copy of the va_list so that we can iterate back over it in case of
* errors. */
va_copy (ap2, ap);
while (type != G_MARKUP_COLLECT_INVALID)
{
gboolean mandatory;
const gchar *value;
mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL);
type &= (G_MARKUP_COLLECT_OPTIONAL - 1);
/* tristate records a value != TRUE and != FALSE
* for the case where the attribute is missing
*/
if (type == G_MARKUP_COLLECT_TRISTATE)
mandatory = FALSE;
for (i = 0; attribute_names[i]; i++)
if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i)))
if (!strcmp (attribute_names[i], attr))
break;
/* ISO C99 only promises that the user can pass up to 127 arguments.
* Subtracting the first 4 arguments plus the final NULL and dividing
* by 3 arguments per collected attribute, we are left with a maximum
* number of supported attributes of (127 - 5) / 3 = 40.
*
* In reality, nobody is ever going to call us with anywhere close to
* 40 attributes to collect, so it is safe to assume that if i > 40
* then the user has given some invalid or repeated arguments. These
* problems will be caught and reported at the end of the function.
*
* We know at this point that we have an error, but we don't know
* what error it is, so just continue...
*/
if (i < 40)
collected |= (G_GUINT64_CONSTANT(1) << i);
value = attribute_values[i];
if (value == NULL && mandatory)
{
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_MISSING_ATTRIBUTE,
"element '%s' requires attribute '%s'",
element_name, attr);
goto failure;
}
switch (type)
{
case G_MARKUP_COLLECT_STRING:
{
const char **str_ptr;
str_ptr = va_arg (ap, const char **);
if (str_ptr != NULL)
*str_ptr = value;
}
break;
case G_MARKUP_COLLECT_STRDUP:
{
char **str_ptr;
str_ptr = va_arg (ap, char **);
if (str_ptr != NULL)
*str_ptr = g_strdup (value);
}
break;
case G_MARKUP_COLLECT_BOOLEAN:
case G_MARKUP_COLLECT_TRISTATE:
if (value == NULL)
{
gboolean *bool_ptr;
bool_ptr = va_arg (ap, gboolean *);
if (bool_ptr != NULL)
{
if (type == G_MARKUP_COLLECT_TRISTATE)
/* constructivists rejoice!
* neither false nor true...
*/
*bool_ptr = -1;
else /* G_MARKUP_COLLECT_BOOLEAN */
*bool_ptr = FALSE;
}
}
else
{
if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *)))
{
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_INVALID_CONTENT,
"element '%s', attribute '%s', value '%s' "
"cannot be parsed as a boolean value",
element_name, attr, value);
goto failure;
}
}
break;
default:
g_assert_not_reached ();
}
type = va_arg (ap, GMarkupCollectType);
attr = va_arg (ap, const char *);
written++;
}
/* ensure we collected all the arguments */
for (i = 0; attribute_names[i]; i++)
if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0)
{
/* attribute not collected: could be caused by two things.
*
* 1) it doesn't exist in our list of attributes
* 2) it existed but was matched by a duplicate attribute earlier
*
* find out.
*/
int j;
for (j = 0; j < i; j++)
if (strcmp (attribute_names[i], attribute_names[j]) == 0)
/* duplicate! */
break;
/* j is now the first occurrence of attribute_names[i] */
if (i == j && reject_unknown_attributes)
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
"attribute '%s' invalid for element '%s'",
attribute_names[i], element_name);
else if (i != j)
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_INVALID_CONTENT,
"attribute '%s' given multiple times for element '%s'",
attribute_names[i], element_name);
else
continue; /* accepting unknown attributes */
goto failure;
}
va_end (ap2);
return TRUE;
failure:
/* replay the above to free allocations */
type = first_type;
attr = first_attr;
while (type != G_MARKUP_COLLECT_INVALID)
{
gpointer ptr;
ptr = va_arg (ap2, gpointer);
if (ptr != NULL)
{
switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1))
{
case G_MARKUP_COLLECT_STRDUP:
if (written)
g_free (*(char **) ptr);
case G_MARKUP_COLLECT_STRING:
*(char **) ptr = NULL;
break;
case G_MARKUP_COLLECT_BOOLEAN:
*(gboolean *) ptr = FALSE;
break;
case G_MARKUP_COLLECT_TRISTATE:
*(gboolean *) ptr = -1;
break;
}
}
type = va_arg (ap2, GMarkupCollectType);
attr = va_arg (ap2, const char *);
}
va_end (ap2);
return FALSE;
}
/** /**
* g_markup_collect_attributes: * g_markup_collect_attributes:
* @element_name: the current tag name * @element_name: the current tag name
@ -2614,211 +2835,68 @@ g_markup_collect_attributes (const gchar *element_name,
const gchar *first_attr, const gchar *first_attr,
...) ...)
{ {
GMarkupCollectType type; gboolean retval;
const gchar *attr;
guint64 collected;
int written;
va_list ap; va_list ap;
int i;
type = first_type;
attr = first_attr;
collected = 0;
written = 0;
va_start (ap, first_attr); va_start (ap, first_attr);
while (type != G_MARKUP_COLLECT_INVALID) retval = _g_markup_collect_attributesv (element_name,
{ attribute_names, attribute_values,
gboolean mandatory; TRUE, error,
const gchar *value; first_type, first_attr,
ap);
mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL);
type &= (G_MARKUP_COLLECT_OPTIONAL - 1);
/* tristate records a value != TRUE and != FALSE
* for the case where the attribute is missing
*/
if (type == G_MARKUP_COLLECT_TRISTATE)
mandatory = FALSE;
for (i = 0; attribute_names[i]; i++)
if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i)))
if (!strcmp (attribute_names[i], attr))
break;
/* ISO C99 only promises that the user can pass up to 127 arguments.
* Subtracting the first 4 arguments plus the final NULL and dividing
* by 3 arguments per collected attribute, we are left with a maximum
* number of supported attributes of (127 - 5) / 3 = 40.
*
* In reality, nobody is ever going to call us with anywhere close to
* 40 attributes to collect, so it is safe to assume that if i > 40
* then the user has given some invalid or repeated arguments. These
* problems will be caught and reported at the end of the function.
*
* We know at this point that we have an error, but we don't know
* what error it is, so just continue...
*/
if (i < 40)
collected |= (G_GUINT64_CONSTANT(1) << i);
value = attribute_values[i];
if (value == NULL && mandatory)
{
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_MISSING_ATTRIBUTE,
"element '%s' requires attribute '%s'",
element_name, attr);
va_end (ap);
goto failure;
}
switch (type)
{
case G_MARKUP_COLLECT_STRING:
{
const char **str_ptr;
str_ptr = va_arg (ap, const char **);
if (str_ptr != NULL)
*str_ptr = value;
}
break;
case G_MARKUP_COLLECT_STRDUP:
{
char **str_ptr;
str_ptr = va_arg (ap, char **);
if (str_ptr != NULL)
*str_ptr = g_strdup (value);
}
break;
case G_MARKUP_COLLECT_BOOLEAN:
case G_MARKUP_COLLECT_TRISTATE:
if (value == NULL)
{
gboolean *bool_ptr;
bool_ptr = va_arg (ap, gboolean *);
if (bool_ptr != NULL)
{
if (type == G_MARKUP_COLLECT_TRISTATE)
/* constructivists rejoice!
* neither false nor true...
*/
*bool_ptr = -1;
else /* G_MARKUP_COLLECT_BOOLEAN */
*bool_ptr = FALSE;
}
}
else
{
if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *)))
{
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_INVALID_CONTENT,
"element '%s', attribute '%s', value '%s' "
"cannot be parsed as a boolean value",
element_name, attr, value);
va_end (ap);
goto failure;
}
}
break;
default:
g_assert_not_reached ();
}
type = va_arg (ap, GMarkupCollectType);
attr = va_arg (ap, const char *);
written++;
}
va_end (ap); va_end (ap);
/* ensure we collected all the arguments */ return retval;
for (i = 0; attribute_names[i]; i++) }
if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0)
{ /**
/* attribute not collected: could be caused by two things. * g_markup_collect_known_attributes:
* * @element_name: the current tag name
* 1) it doesn't exist in our list of attributes * @attribute_names: (array zero-terminated=1): the attribute names
* 2) it existed but was matched by a duplicate attribute earlier * @attribute_values: (array zero-terminated=1): the attribute values
* * @error: (allow-none): a pointer to a #GError or %NULL
* find out. * @first_type: the #GMarkupCollectType of the first attribute
*/ * @first_attr: the name of the first attribute
int j; * @...: a pointer to the storage location of the first attribute
* (or %NULL), followed by more types names and pointers, ending
for (j = 0; j < i; j++) * with %G_MARKUP_COLLECT_INVALID
if (strcmp (attribute_names[i], attribute_names[j]) == 0) *
/* duplicate! */ * Collects the attributes of the element from the data passed to the
break; * #GMarkupParser start_element function, dealing with common error
* conditions and supporting boolean values.
/* j is now the first occurrence of attribute_names[i] */ *
if (i == j) * This is a more relaxed version of g_markup_collect_attributes(), which
g_set_error (error, G_MARKUP_ERROR, * ignores attributes found in @attribute_names but not listed in @first_attr
G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE, * or @...; by comparison g_markup_collect_attributes() will return
"attribute '%s' invalid for element '%s'", * %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE instead. Otherwise, this function behaves
attribute_names[i], element_name); * identically.
else *
g_set_error (error, G_MARKUP_ERROR, * This is intended for situations where the markup being parsed may use
G_MARKUP_ERROR_INVALID_CONTENT, * extensions in other namespaces and thus contain extra, unknown, attributes.
"attribute '%s' given multiple times for element '%s'", *
attribute_names[i], element_name); * Return value: %TRUE if successful
*
goto failure; * Since: 2.34
} */
gboolean
return TRUE; g_markup_collect_known_attributes (const gchar *element_name,
const gchar **attribute_names,
failure: const gchar **attribute_values,
/* replay the above to free allocations */ GError **error,
type = first_type; GMarkupCollectType first_type,
attr = first_attr; const gchar *first_attr,
...)
va_start (ap, first_attr); {
while (type != G_MARKUP_COLLECT_INVALID) gboolean retval;
{ va_list ap;
gpointer ptr;
va_start (ap, first_attr);
ptr = va_arg (ap, gpointer); retval = _g_markup_collect_attributesv (element_name,
attribute_names, attribute_values,
if (ptr != NULL) FALSE, error,
{ first_type, first_attr,
switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1)) ap);
{ va_end (ap);
case G_MARKUP_COLLECT_STRDUP:
if (written) return retval;
g_free (*(char **) ptr);
case G_MARKUP_COLLECT_STRING:
*(char **) ptr = NULL;
break;
case G_MARKUP_COLLECT_BOOLEAN:
*(gboolean *) ptr = FALSE;
break;
case G_MARKUP_COLLECT_TRISTATE:
*(gboolean *) ptr = -1;
break;
}
}
type = va_arg (ap, GMarkupCollectType);
attr = va_arg (ap, const char *);
}
va_end (ap);
return FALSE;
} }

View File

@ -233,6 +233,15 @@ gboolean g_markup_collect_attributes (const gchar *element_name,
const gchar *first_attr, const gchar *first_attr,
...); ...);
GLIB_AVAILABLE_IN_2_34
gboolean g_markup_collect_known_attributes (const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
GError **error,
GMarkupCollectType first_type,
const gchar *first_attr,
...);
G_END_DECLS G_END_DECLS
#endif /* __G_MARKUP_H__ */ #endif /* __G_MARKUP_H__ */

View File

@ -1,11 +1,11 @@
/* /*
* Copyright © 2007 Ryan Lortie * Copyright © 2007 Ryan Lortie
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as * it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2 of the * published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version. * License, or (at your option) any later version.
* *
* See the included COPYING file for more information. * See the included COPYING file for more information.
*/ */
@ -13,6 +13,19 @@
#include <string.h> #include <string.h>
#include <glib.h> #include <glib.h>
enum test_type
{
COLLECT_ATTRIBUTES = 0,
COLLECT_KNOWN_ATTRIBUTES,
MAX_TEST_TYPE
};
struct test_data
{
enum test_type test_type;
GString *string;
};
static void static void
start (GMarkupParseContext *context, start (GMarkupParseContext *context,
const char *element_name, const char *element_name,
@ -21,13 +34,26 @@ start (GMarkupParseContext *context,
gpointer user_data, gpointer user_data,
GError **error) GError **error)
{ {
GString *string = user_data; struct test_data *data = user_data;
gboolean result; gboolean result;
#define collect(...) \ #define collect(...) G_STMT_START { \
g_markup_collect_attributes (element_name, attribute_names, \ if (data->test_type == COLLECT_ATTRIBUTES) \
attribute_values, error, __VA_ARGS__, \ { \
G_MARKUP_COLLECT_INVALID) result = \
g_markup_collect_attributes (element_name, attribute_names, \
attribute_values, error, __VA_ARGS__, \
G_MARKUP_COLLECT_INVALID); \
} \
else \
{ \
result = \
g_markup_collect_known_attributes (element_name, attribute_names, \
attribute_values, error, \
__VA_ARGS__, \
G_MARKUP_COLLECT_INVALID); \
} \
} G_STMT_END
#define BOOL G_MARKUP_COLLECT_BOOLEAN #define BOOL G_MARKUP_COLLECT_BOOLEAN
#define OPTBOOL G_MARKUP_COLLECT_BOOLEAN | G_MARKUP_COLLECT_OPTIONAL #define OPTBOOL G_MARKUP_COLLECT_BOOLEAN | G_MARKUP_COLLECT_OPTIONAL
#define TRI G_MARKUP_COLLECT_TRISTATE #define TRI G_MARKUP_COLLECT_TRISTATE
@ -41,9 +67,9 @@ start (GMarkupParseContext *context,
{ {
gboolean mb = 2, ob = 2, tri = 2; gboolean mb = 2, ob = 2, tri = 2;
result = collect (BOOL, "mb", &mb, collect (BOOL, "mb", &mb,
OPTBOOL, "ob", &ob, OPTBOOL, "ob", &ob,
TRI, "tri", &tri); TRI, "tri", &tri);
g_assert (result || g_assert (result ||
(mb == FALSE && ob == FALSE && tri != TRUE && tri != FALSE)); (mb == FALSE && ob == FALSE && tri != TRUE && tri != FALSE));
@ -51,7 +77,7 @@ start (GMarkupParseContext *context,
if (tri != FALSE && tri != TRUE) if (tri != FALSE && tri != TRUE)
tri = -1; tri = -1;
g_string_append_printf (string, "<bool(%d) %d %d %d>", g_string_append_printf (data->string, "<bool(%d) %d %d %d>",
result, mb, ob, tri); result, mb, ob, tri);
} }
@ -60,15 +86,15 @@ start (GMarkupParseContext *context,
const char *cm, *co; const char *cm, *co;
char *am, *ao; char *am, *ao;
result = collect (STR, "cm", &cm, collect (STR, "cm", &cm,
STRDUP, "am", &am, STRDUP, "am", &am,
OPTDUP, "ao", &ao, OPTDUP, "ao", &ao,
OPTSTR, "co", &co); OPTSTR, "co", &co);
g_assert (result || g_assert (result ||
(cm == NULL && am == NULL && ao == NULL && co == NULL)); (cm == NULL && am == NULL && ao == NULL && co == NULL));
g_string_append_printf (string, "<str(%d) %s %s %s %s>", g_string_append_printf (data->string, "<str(%d) %s %s %s %s>",
result, n (cm), n (am), n (ao), n (co)); result, n (cm), n (am), n (ao), n (co));
g_free (am); g_free (am);
@ -140,34 +166,49 @@ static void
test_collect (gconstpointer d) test_collect (gconstpointer d)
{ {
const struct test *test = d; const struct test *test = d;
enum test_type t;
GMarkupParseContext *ctx; for (t = 0; t < MAX_TEST_TYPE; t++)
GError *error = NULL;
GString *string;
gboolean result;
string = g_string_new ("");
ctx = g_markup_parse_context_new (&parser, 0, string, NULL);
result = g_markup_parse_context_parse (ctx,
test->document,
-1, &error);
if (result)
result = g_markup_parse_context_end_parse (ctx, &error);
if (result)
{ {
g_assert_no_error (error); GMarkupParseContext *ctx;
g_assert_cmpint (test->error_code, ==, 0); GError *error = NULL;
g_assert_cmpstr (test->result, ==, string->str); gboolean result;
} struct test_data data;
else
{
g_assert_error (error, G_MARKUP_ERROR, test->error_code);
}
g_markup_parse_context_free (ctx); data.test_type = t;
g_string_free (string, TRUE); data.string = g_string_new ("");
g_clear_error (&error);
ctx = g_markup_parse_context_new (&parser, 0, &data, NULL);
result = g_markup_parse_context_parse (ctx,
test->document,
-1, &error);
if (result)
result = g_markup_parse_context_end_parse (ctx, &error);
if (result &&
!(t == COLLECT_KNOWN_ATTRIBUTES &&
test->error_code == G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE))
{
/* Normal test */
g_assert_no_error (error);
g_assert_cmpint (test->error_code, ==, 0);
g_assert_cmpstr (test->result, ==, data.string->str);
}
else if (result)
{
/* Test expecting UNKNOWN_ATTRIBUTE, and we're parsing with
* collect_known_attributes(). */
g_assert_no_error (error);
}
else
{
g_assert_error (error, G_MARKUP_ERROR, test->error_code);
}
g_markup_parse_context_free (ctx);
g_string_free (data.string, TRUE);
g_clear_error (&error);
}
} }
#define XML "<element a='1' b='2' c='3'/>" #define XML "<element a='1' b='2' c='3'/>"