gmarkup: Add g_markup_collect_known_attributes()

Add a variant of g_markup_collect_attributes() which will
ignore unknown attributes (such as those from different XML
namespaces) when parsing markup, rather than returning
G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE as g_markup_collect_attributes()
does.

Patch by Philip Withnall,
https://bugzilla.gnome.org/show_bug.cgi?id=665634
This commit is contained in:
Matthias Clasen 2012-08-16 23:40:08 -04:00
parent 21aff13d22
commit 8d40389d15
5 changed files with 375 additions and 245 deletions

View File

@ -1125,6 +1125,7 @@ g_markup_parse_context_pop
<SUBSECTION>
GMarkupCollectType
g_markup_collect_attributes
g_markup_collect_known_attributes
<SUBSECTION Private>
g_markup_error_quark
</SECTION>

View File

@ -683,6 +683,7 @@ g_markup_parse_context_pop
g_markup_printf_escaped
g_markup_vprintf_escaped
g_markup_collect_attributes
g_markup_collect_known_attributes
g_free
g_clear_pointer
g_malloc

View File

@ -2549,13 +2549,234 @@ g_markup_parse_boolean (const char *string,
* is set depending on what value type is used
*
* A mixed enumerated type and flags field. You must specify one type
* (string, strdup, boolean, tristate). Additionally, you may optionally
* (string, strdup, boolean, tristate). Additionally, you may optionally
* bitwise OR the type with the flag %G_MARKUP_COLLECT_OPTIONAL.
*
* It is likely that this enum will be extended in the future to
* support other types.
*/
static gboolean
_g_markup_collect_attributesv (const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gboolean reject_unknown_attributes,
GError **error,
GMarkupCollectType first_type,
const gchar *first_attr,
va_list ap)
{
GMarkupCollectType type;
const gchar *attr;
guint64 collected;
int written;
int i;
va_list ap2;
type = first_type;
attr = first_attr;
collected = 0;
written = 0;
/* Take a copy of the va_list so that we can iterate back over it in case of
* errors. */
va_copy (ap2, ap);
while (type != G_MARKUP_COLLECT_INVALID)
{
gboolean mandatory;
const gchar *value;
mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL);
type &= (G_MARKUP_COLLECT_OPTIONAL - 1);
/* tristate records a value != TRUE and != FALSE
* for the case where the attribute is missing
*/
if (type == G_MARKUP_COLLECT_TRISTATE)
mandatory = FALSE;
for (i = 0; attribute_names[i]; i++)
if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i)))
if (!strcmp (attribute_names[i], attr))
break;
/* ISO C99 only promises that the user can pass up to 127 arguments.
* Subtracting the first 4 arguments plus the final NULL and dividing
* by 3 arguments per collected attribute, we are left with a maximum
* number of supported attributes of (127 - 5) / 3 = 40.
*
* In reality, nobody is ever going to call us with anywhere close to
* 40 attributes to collect, so it is safe to assume that if i > 40
* then the user has given some invalid or repeated arguments. These
* problems will be caught and reported at the end of the function.
*
* We know at this point that we have an error, but we don't know
* what error it is, so just continue...
*/
if (i < 40)
collected |= (G_GUINT64_CONSTANT(1) << i);
value = attribute_values[i];
if (value == NULL && mandatory)
{
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_MISSING_ATTRIBUTE,
"element '%s' requires attribute '%s'",
element_name, attr);
goto failure;
}
switch (type)
{
case G_MARKUP_COLLECT_STRING:
{
const char **str_ptr;
str_ptr = va_arg (ap, const char **);
if (str_ptr != NULL)
*str_ptr = value;
}
break;
case G_MARKUP_COLLECT_STRDUP:
{
char **str_ptr;
str_ptr = va_arg (ap, char **);
if (str_ptr != NULL)
*str_ptr = g_strdup (value);
}
break;
case G_MARKUP_COLLECT_BOOLEAN:
case G_MARKUP_COLLECT_TRISTATE:
if (value == NULL)
{
gboolean *bool_ptr;
bool_ptr = va_arg (ap, gboolean *);
if (bool_ptr != NULL)
{
if (type == G_MARKUP_COLLECT_TRISTATE)
/* constructivists rejoice!
* neither false nor true...
*/
*bool_ptr = -1;
else /* G_MARKUP_COLLECT_BOOLEAN */
*bool_ptr = FALSE;
}
}
else
{
if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *)))
{
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_INVALID_CONTENT,
"element '%s', attribute '%s', value '%s' "
"cannot be parsed as a boolean value",
element_name, attr, value);
goto failure;
}
}
break;
default:
g_assert_not_reached ();
}
type = va_arg (ap, GMarkupCollectType);
attr = va_arg (ap, const char *);
written++;
}
/* ensure we collected all the arguments */
for (i = 0; attribute_names[i]; i++)
if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0)
{
/* attribute not collected: could be caused by two things.
*
* 1) it doesn't exist in our list of attributes
* 2) it existed but was matched by a duplicate attribute earlier
*
* find out.
*/
int j;
for (j = 0; j < i; j++)
if (strcmp (attribute_names[i], attribute_names[j]) == 0)
/* duplicate! */
break;
/* j is now the first occurrence of attribute_names[i] */
if (i == j && reject_unknown_attributes)
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
"attribute '%s' invalid for element '%s'",
attribute_names[i], element_name);
else if (i != j)
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_INVALID_CONTENT,
"attribute '%s' given multiple times for element '%s'",
attribute_names[i], element_name);
else
continue; /* accepting unknown attributes */
goto failure;
}
va_end (ap2);
return TRUE;
failure:
/* replay the above to free allocations */
type = first_type;
attr = first_attr;
while (type != G_MARKUP_COLLECT_INVALID)
{
gpointer ptr;
ptr = va_arg (ap2, gpointer);
if (ptr != NULL)
{
switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1))
{
case G_MARKUP_COLLECT_STRDUP:
if (written)
g_free (*(char **) ptr);
case G_MARKUP_COLLECT_STRING:
*(char **) ptr = NULL;
break;
case G_MARKUP_COLLECT_BOOLEAN:
*(gboolean *) ptr = FALSE;
break;
case G_MARKUP_COLLECT_TRISTATE:
*(gboolean *) ptr = -1;
break;
}
}
type = va_arg (ap2, GMarkupCollectType);
attr = va_arg (ap2, const char *);
}
va_end (ap2);
return FALSE;
}
/**
* g_markup_collect_attributes:
* @element_name: the current tag name
@ -2614,211 +2835,68 @@ g_markup_collect_attributes (const gchar *element_name,
const gchar *first_attr,
...)
{
GMarkupCollectType type;
const gchar *attr;
guint64 collected;
int written;
gboolean retval;
va_list ap;
int i;
type = first_type;
attr = first_attr;
collected = 0;
written = 0;
va_start (ap, first_attr);
while (type != G_MARKUP_COLLECT_INVALID)
{
gboolean mandatory;
const gchar *value;
mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL);
type &= (G_MARKUP_COLLECT_OPTIONAL - 1);
/* tristate records a value != TRUE and != FALSE
* for the case where the attribute is missing
*/
if (type == G_MARKUP_COLLECT_TRISTATE)
mandatory = FALSE;
for (i = 0; attribute_names[i]; i++)
if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i)))
if (!strcmp (attribute_names[i], attr))
break;
/* ISO C99 only promises that the user can pass up to 127 arguments.
* Subtracting the first 4 arguments plus the final NULL and dividing
* by 3 arguments per collected attribute, we are left with a maximum
* number of supported attributes of (127 - 5) / 3 = 40.
*
* In reality, nobody is ever going to call us with anywhere close to
* 40 attributes to collect, so it is safe to assume that if i > 40
* then the user has given some invalid or repeated arguments. These
* problems will be caught and reported at the end of the function.
*
* We know at this point that we have an error, but we don't know
* what error it is, so just continue...
*/
if (i < 40)
collected |= (G_GUINT64_CONSTANT(1) << i);
value = attribute_values[i];
if (value == NULL && mandatory)
{
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_MISSING_ATTRIBUTE,
"element '%s' requires attribute '%s'",
element_name, attr);
va_end (ap);
goto failure;
}
switch (type)
{
case G_MARKUP_COLLECT_STRING:
{
const char **str_ptr;
str_ptr = va_arg (ap, const char **);
if (str_ptr != NULL)
*str_ptr = value;
}
break;
case G_MARKUP_COLLECT_STRDUP:
{
char **str_ptr;
str_ptr = va_arg (ap, char **);
if (str_ptr != NULL)
*str_ptr = g_strdup (value);
}
break;
case G_MARKUP_COLLECT_BOOLEAN:
case G_MARKUP_COLLECT_TRISTATE:
if (value == NULL)
{
gboolean *bool_ptr;
bool_ptr = va_arg (ap, gboolean *);
if (bool_ptr != NULL)
{
if (type == G_MARKUP_COLLECT_TRISTATE)
/* constructivists rejoice!
* neither false nor true...
*/
*bool_ptr = -1;
else /* G_MARKUP_COLLECT_BOOLEAN */
*bool_ptr = FALSE;
}
}
else
{
if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *)))
{
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_INVALID_CONTENT,
"element '%s', attribute '%s', value '%s' "
"cannot be parsed as a boolean value",
element_name, attr, value);
va_end (ap);
goto failure;
}
}
break;
default:
g_assert_not_reached ();
}
type = va_arg (ap, GMarkupCollectType);
attr = va_arg (ap, const char *);
written++;
}
retval = _g_markup_collect_attributesv (element_name,
attribute_names, attribute_values,
TRUE, error,
first_type, first_attr,
ap);
va_end (ap);
/* ensure we collected all the arguments */
for (i = 0; attribute_names[i]; i++)
if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0)
{
/* attribute not collected: could be caused by two things.
*
* 1) it doesn't exist in our list of attributes
* 2) it existed but was matched by a duplicate attribute earlier
*
* find out.
*/
int j;
for (j = 0; j < i; j++)
if (strcmp (attribute_names[i], attribute_names[j]) == 0)
/* duplicate! */
break;
/* j is now the first occurrence of attribute_names[i] */
if (i == j)
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
"attribute '%s' invalid for element '%s'",
attribute_names[i], element_name);
else
g_set_error (error, G_MARKUP_ERROR,
G_MARKUP_ERROR_INVALID_CONTENT,
"attribute '%s' given multiple times for element '%s'",
attribute_names[i], element_name);
goto failure;
}
return TRUE;
failure:
/* replay the above to free allocations */
type = first_type;
attr = first_attr;
va_start (ap, first_attr);
while (type != G_MARKUP_COLLECT_INVALID)
{
gpointer ptr;
ptr = va_arg (ap, gpointer);
if (ptr != NULL)
{
switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1))
{
case G_MARKUP_COLLECT_STRDUP:
if (written)
g_free (*(char **) ptr);
case G_MARKUP_COLLECT_STRING:
*(char **) ptr = NULL;
break;
case G_MARKUP_COLLECT_BOOLEAN:
*(gboolean *) ptr = FALSE;
break;
case G_MARKUP_COLLECT_TRISTATE:
*(gboolean *) ptr = -1;
break;
}
}
type = va_arg (ap, GMarkupCollectType);
attr = va_arg (ap, const char *);
}
va_end (ap);
return FALSE;
return retval;
}
/**
* g_markup_collect_known_attributes:
* @element_name: the current tag name
* @attribute_names: (array zero-terminated=1): the attribute names
* @attribute_values: (array zero-terminated=1): the attribute values
* @error: (allow-none): a pointer to a #GError or %NULL
* @first_type: the #GMarkupCollectType of the first attribute
* @first_attr: the name of the first attribute
* @...: a pointer to the storage location of the first attribute
* (or %NULL), followed by more types names and pointers, ending
* with %G_MARKUP_COLLECT_INVALID
*
* Collects the attributes of the element from the data passed to the
* #GMarkupParser start_element function, dealing with common error
* conditions and supporting boolean values.
*
* This is a more relaxed version of g_markup_collect_attributes(), which
* ignores attributes found in @attribute_names but not listed in @first_attr
* or @...; by comparison g_markup_collect_attributes() will return
* %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE instead. Otherwise, this function behaves
* identically.
*
* This is intended for situations where the markup being parsed may use
* extensions in other namespaces and thus contain extra, unknown, attributes.
*
* Return value: %TRUE if successful
*
* Since: 2.34
*/
gboolean
g_markup_collect_known_attributes (const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
GError **error,
GMarkupCollectType first_type,
const gchar *first_attr,
...)
{
gboolean retval;
va_list ap;
va_start (ap, first_attr);
retval = _g_markup_collect_attributesv (element_name,
attribute_names, attribute_values,
FALSE, error,
first_type, first_attr,
ap);
va_end (ap);
return retval;
}

View File

@ -233,6 +233,15 @@ gboolean g_markup_collect_attributes (const gchar *element_name,
const gchar *first_attr,
...);
GLIB_AVAILABLE_IN_2_34
gboolean g_markup_collect_known_attributes (const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
GError **error,
GMarkupCollectType first_type,
const gchar *first_attr,
...);
G_END_DECLS
#endif /* __G_MARKUP_H__ */

View File

@ -1,11 +1,11 @@
/*
/*
* Copyright © 2007 Ryan Lortie
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
*
* See the included COPYING file for more information.
*/
@ -13,6 +13,19 @@
#include <string.h>
#include <glib.h>
enum test_type
{
COLLECT_ATTRIBUTES = 0,
COLLECT_KNOWN_ATTRIBUTES,
MAX_TEST_TYPE
};
struct test_data
{
enum test_type test_type;
GString *string;
};
static void
start (GMarkupParseContext *context,
const char *element_name,
@ -21,13 +34,26 @@ start (GMarkupParseContext *context,
gpointer user_data,
GError **error)
{
GString *string = user_data;
struct test_data *data = user_data;
gboolean result;
#define collect(...) \
g_markup_collect_attributes (element_name, attribute_names, \
attribute_values, error, __VA_ARGS__, \
G_MARKUP_COLLECT_INVALID)
#define collect(...) G_STMT_START { \
if (data->test_type == COLLECT_ATTRIBUTES) \
{ \
result = \
g_markup_collect_attributes (element_name, attribute_names, \
attribute_values, error, __VA_ARGS__, \
G_MARKUP_COLLECT_INVALID); \
} \
else \
{ \
result = \
g_markup_collect_known_attributes (element_name, attribute_names, \
attribute_values, error, \
__VA_ARGS__, \
G_MARKUP_COLLECT_INVALID); \
} \
} G_STMT_END
#define BOOL G_MARKUP_COLLECT_BOOLEAN
#define OPTBOOL G_MARKUP_COLLECT_BOOLEAN | G_MARKUP_COLLECT_OPTIONAL
#define TRI G_MARKUP_COLLECT_TRISTATE
@ -41,9 +67,9 @@ start (GMarkupParseContext *context,
{
gboolean mb = 2, ob = 2, tri = 2;
result = collect (BOOL, "mb", &mb,
OPTBOOL, "ob", &ob,
TRI, "tri", &tri);
collect (BOOL, "mb", &mb,
OPTBOOL, "ob", &ob,
TRI, "tri", &tri);
g_assert (result ||
(mb == FALSE && ob == FALSE && tri != TRUE && tri != FALSE));
@ -51,7 +77,7 @@ start (GMarkupParseContext *context,
if (tri != FALSE && tri != TRUE)
tri = -1;
g_string_append_printf (string, "<bool(%d) %d %d %d>",
g_string_append_printf (data->string, "<bool(%d) %d %d %d>",
result, mb, ob, tri);
}
@ -60,15 +86,15 @@ start (GMarkupParseContext *context,
const char *cm, *co;
char *am, *ao;
result = collect (STR, "cm", &cm,
STRDUP, "am", &am,
OPTDUP, "ao", &ao,
OPTSTR, "co", &co);
collect (STR, "cm", &cm,
STRDUP, "am", &am,
OPTDUP, "ao", &ao,
OPTSTR, "co", &co);
g_assert (result ||
(cm == NULL && am == NULL && ao == NULL && co == NULL));
g_string_append_printf (string, "<str(%d) %s %s %s %s>",
g_string_append_printf (data->string, "<str(%d) %s %s %s %s>",
result, n (cm), n (am), n (ao), n (co));
g_free (am);
@ -140,34 +166,49 @@ static void
test_collect (gconstpointer d)
{
const struct test *test = d;
enum test_type t;
GMarkupParseContext *ctx;
GError *error = NULL;
GString *string;
gboolean result;
string = g_string_new ("");
ctx = g_markup_parse_context_new (&parser, 0, string, NULL);
result = g_markup_parse_context_parse (ctx,
test->document,
-1, &error);
if (result)
result = g_markup_parse_context_end_parse (ctx, &error);
if (result)
for (t = 0; t < MAX_TEST_TYPE; t++)
{
g_assert_no_error (error);
g_assert_cmpint (test->error_code, ==, 0);
g_assert_cmpstr (test->result, ==, string->str);
}
else
{
g_assert_error (error, G_MARKUP_ERROR, test->error_code);
}
GMarkupParseContext *ctx;
GError *error = NULL;
gboolean result;
struct test_data data;
g_markup_parse_context_free (ctx);
g_string_free (string, TRUE);
g_clear_error (&error);
data.test_type = t;
data.string = g_string_new ("");
ctx = g_markup_parse_context_new (&parser, 0, &data, NULL);
result = g_markup_parse_context_parse (ctx,
test->document,
-1, &error);
if (result)
result = g_markup_parse_context_end_parse (ctx, &error);
if (result &&
!(t == COLLECT_KNOWN_ATTRIBUTES &&
test->error_code == G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE))
{
/* Normal test */
g_assert_no_error (error);
g_assert_cmpint (test->error_code, ==, 0);
g_assert_cmpstr (test->result, ==, data.string->str);
}
else if (result)
{
/* Test expecting UNKNOWN_ATTRIBUTE, and we're parsing with
* collect_known_attributes(). */
g_assert_no_error (error);
}
else
{
g_assert_error (error, G_MARKUP_ERROR, test->error_code);
}
g_markup_parse_context_free (ctx);
g_string_free (data.string, TRUE);
g_clear_error (&error);
}
}
#define XML "<element a='1' b='2' c='3'/>"