From 8d40389d15544bdc612989157f80380badce52f7 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Thu, 16 Aug 2012 23:40:08 -0400 Subject: [PATCH] gmarkup: Add g_markup_collect_known_attributes() Add a variant of g_markup_collect_attributes() which will ignore unknown attributes (such as those from different XML namespaces) when parsing markup, rather than returning G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE as g_markup_collect_attributes() does. Patch by Philip Withnall, https://bugzilla.gnome.org/show_bug.cgi?id=665634 --- docs/reference/glib/glib-sections.txt | 1 + glib/glib.symbols | 1 + glib/gmarkup.c | 484 +++++++++++++++----------- glib/gmarkup.h | 9 + glib/tests/markup-collect.c | 125 ++++--- 5 files changed, 375 insertions(+), 245 deletions(-) diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt index dacacbb08..4c0071ca5 100644 --- a/docs/reference/glib/glib-sections.txt +++ b/docs/reference/glib/glib-sections.txt @@ -1125,6 +1125,7 @@ g_markup_parse_context_pop GMarkupCollectType g_markup_collect_attributes +g_markup_collect_known_attributes g_markup_error_quark diff --git a/glib/glib.symbols b/glib/glib.symbols index 0490b6696..dbd924622 100644 --- a/glib/glib.symbols +++ b/glib/glib.symbols @@ -683,6 +683,7 @@ g_markup_parse_context_pop g_markup_printf_escaped g_markup_vprintf_escaped g_markup_collect_attributes +g_markup_collect_known_attributes g_free g_clear_pointer g_malloc diff --git a/glib/gmarkup.c b/glib/gmarkup.c index b0c28a986..4fda725b5 100644 --- a/glib/gmarkup.c +++ b/glib/gmarkup.c @@ -2549,13 +2549,234 @@ g_markup_parse_boolean (const char *string, * is set depending on what value type is used * * A mixed enumerated type and flags field. You must specify one type - * (string, strdup, boolean, tristate). Additionally, you may optionally + * (string, strdup, boolean, tristate). Additionally, you may optionally * bitwise OR the type with the flag %G_MARKUP_COLLECT_OPTIONAL. * * It is likely that this enum will be extended in the future to * support other types. */ +static gboolean +_g_markup_collect_attributesv (const gchar *element_name, + const gchar **attribute_names, + const gchar **attribute_values, + gboolean reject_unknown_attributes, + GError **error, + GMarkupCollectType first_type, + const gchar *first_attr, + va_list ap) +{ + GMarkupCollectType type; + const gchar *attr; + guint64 collected; + int written; + int i; + va_list ap2; + + type = first_type; + attr = first_attr; + collected = 0; + written = 0; + + /* Take a copy of the va_list so that we can iterate back over it in case of + * errors. */ + va_copy (ap2, ap); + + while (type != G_MARKUP_COLLECT_INVALID) + { + gboolean mandatory; + const gchar *value; + + mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL); + type &= (G_MARKUP_COLLECT_OPTIONAL - 1); + + /* tristate records a value != TRUE and != FALSE + * for the case where the attribute is missing + */ + if (type == G_MARKUP_COLLECT_TRISTATE) + mandatory = FALSE; + + for (i = 0; attribute_names[i]; i++) + if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i))) + if (!strcmp (attribute_names[i], attr)) + break; + + /* ISO C99 only promises that the user can pass up to 127 arguments. + * Subtracting the first 4 arguments plus the final NULL and dividing + * by 3 arguments per collected attribute, we are left with a maximum + * number of supported attributes of (127 - 5) / 3 = 40. + * + * In reality, nobody is ever going to call us with anywhere close to + * 40 attributes to collect, so it is safe to assume that if i > 40 + * then the user has given some invalid or repeated arguments. These + * problems will be caught and reported at the end of the function. + * + * We know at this point that we have an error, but we don't know + * what error it is, so just continue... + */ + if (i < 40) + collected |= (G_GUINT64_CONSTANT(1) << i); + + value = attribute_values[i]; + + if (value == NULL && mandatory) + { + g_set_error (error, G_MARKUP_ERROR, + G_MARKUP_ERROR_MISSING_ATTRIBUTE, + "element '%s' requires attribute '%s'", + element_name, attr); + + goto failure; + } + + switch (type) + { + case G_MARKUP_COLLECT_STRING: + { + const char **str_ptr; + + str_ptr = va_arg (ap, const char **); + + if (str_ptr != NULL) + *str_ptr = value; + } + break; + + case G_MARKUP_COLLECT_STRDUP: + { + char **str_ptr; + + str_ptr = va_arg (ap, char **); + + if (str_ptr != NULL) + *str_ptr = g_strdup (value); + } + break; + + case G_MARKUP_COLLECT_BOOLEAN: + case G_MARKUP_COLLECT_TRISTATE: + if (value == NULL) + { + gboolean *bool_ptr; + + bool_ptr = va_arg (ap, gboolean *); + + if (bool_ptr != NULL) + { + if (type == G_MARKUP_COLLECT_TRISTATE) + /* constructivists rejoice! + * neither false nor true... + */ + *bool_ptr = -1; + + else /* G_MARKUP_COLLECT_BOOLEAN */ + *bool_ptr = FALSE; + } + } + else + { + if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *))) + { + g_set_error (error, G_MARKUP_ERROR, + G_MARKUP_ERROR_INVALID_CONTENT, + "element '%s', attribute '%s', value '%s' " + "cannot be parsed as a boolean value", + element_name, attr, value); + + goto failure; + } + } + + break; + + default: + g_assert_not_reached (); + } + + type = va_arg (ap, GMarkupCollectType); + attr = va_arg (ap, const char *); + written++; + } + + /* ensure we collected all the arguments */ + for (i = 0; attribute_names[i]; i++) + if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0) + { + /* attribute not collected: could be caused by two things. + * + * 1) it doesn't exist in our list of attributes + * 2) it existed but was matched by a duplicate attribute earlier + * + * find out. + */ + int j; + + for (j = 0; j < i; j++) + if (strcmp (attribute_names[i], attribute_names[j]) == 0) + /* duplicate! */ + break; + + /* j is now the first occurrence of attribute_names[i] */ + if (i == j && reject_unknown_attributes) + g_set_error (error, G_MARKUP_ERROR, + G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE, + "attribute '%s' invalid for element '%s'", + attribute_names[i], element_name); + else if (i != j) + g_set_error (error, G_MARKUP_ERROR, + G_MARKUP_ERROR_INVALID_CONTENT, + "attribute '%s' given multiple times for element '%s'", + attribute_names[i], element_name); + else + continue; /* accepting unknown attributes */ + + goto failure; + } + va_end (ap2); + + return TRUE; + +failure: + /* replay the above to free allocations */ + type = first_type; + attr = first_attr; + + while (type != G_MARKUP_COLLECT_INVALID) + { + gpointer ptr; + + ptr = va_arg (ap2, gpointer); + + if (ptr != NULL) + { + switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1)) + { + case G_MARKUP_COLLECT_STRDUP: + if (written) + g_free (*(char **) ptr); + + case G_MARKUP_COLLECT_STRING: + *(char **) ptr = NULL; + break; + + case G_MARKUP_COLLECT_BOOLEAN: + *(gboolean *) ptr = FALSE; + break; + + case G_MARKUP_COLLECT_TRISTATE: + *(gboolean *) ptr = -1; + break; + } + } + + type = va_arg (ap2, GMarkupCollectType); + attr = va_arg (ap2, const char *); + } + va_end (ap2); + + return FALSE; +} + /** * g_markup_collect_attributes: * @element_name: the current tag name @@ -2614,211 +2835,68 @@ g_markup_collect_attributes (const gchar *element_name, const gchar *first_attr, ...) { - GMarkupCollectType type; - const gchar *attr; - guint64 collected; - int written; + gboolean retval; va_list ap; - int i; - - type = first_type; - attr = first_attr; - collected = 0; - written = 0; va_start (ap, first_attr); - while (type != G_MARKUP_COLLECT_INVALID) - { - gboolean mandatory; - const gchar *value; - - mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL); - type &= (G_MARKUP_COLLECT_OPTIONAL - 1); - - /* tristate records a value != TRUE and != FALSE - * for the case where the attribute is missing - */ - if (type == G_MARKUP_COLLECT_TRISTATE) - mandatory = FALSE; - - for (i = 0; attribute_names[i]; i++) - if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i))) - if (!strcmp (attribute_names[i], attr)) - break; - - /* ISO C99 only promises that the user can pass up to 127 arguments. - * Subtracting the first 4 arguments plus the final NULL and dividing - * by 3 arguments per collected attribute, we are left with a maximum - * number of supported attributes of (127 - 5) / 3 = 40. - * - * In reality, nobody is ever going to call us with anywhere close to - * 40 attributes to collect, so it is safe to assume that if i > 40 - * then the user has given some invalid or repeated arguments. These - * problems will be caught and reported at the end of the function. - * - * We know at this point that we have an error, but we don't know - * what error it is, so just continue... - */ - if (i < 40) - collected |= (G_GUINT64_CONSTANT(1) << i); - - value = attribute_values[i]; - - if (value == NULL && mandatory) - { - g_set_error (error, G_MARKUP_ERROR, - G_MARKUP_ERROR_MISSING_ATTRIBUTE, - "element '%s' requires attribute '%s'", - element_name, attr); - - va_end (ap); - goto failure; - } - - switch (type) - { - case G_MARKUP_COLLECT_STRING: - { - const char **str_ptr; - - str_ptr = va_arg (ap, const char **); - - if (str_ptr != NULL) - *str_ptr = value; - } - break; - - case G_MARKUP_COLLECT_STRDUP: - { - char **str_ptr; - - str_ptr = va_arg (ap, char **); - - if (str_ptr != NULL) - *str_ptr = g_strdup (value); - } - break; - - case G_MARKUP_COLLECT_BOOLEAN: - case G_MARKUP_COLLECT_TRISTATE: - if (value == NULL) - { - gboolean *bool_ptr; - - bool_ptr = va_arg (ap, gboolean *); - - if (bool_ptr != NULL) - { - if (type == G_MARKUP_COLLECT_TRISTATE) - /* constructivists rejoice! - * neither false nor true... - */ - *bool_ptr = -1; - - else /* G_MARKUP_COLLECT_BOOLEAN */ - *bool_ptr = FALSE; - } - } - else - { - if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *))) - { - g_set_error (error, G_MARKUP_ERROR, - G_MARKUP_ERROR_INVALID_CONTENT, - "element '%s', attribute '%s', value '%s' " - "cannot be parsed as a boolean value", - element_name, attr, value); - - va_end (ap); - goto failure; - } - } - - break; - - default: - g_assert_not_reached (); - } - - type = va_arg (ap, GMarkupCollectType); - attr = va_arg (ap, const char *); - written++; - } + retval = _g_markup_collect_attributesv (element_name, + attribute_names, attribute_values, + TRUE, error, + first_type, first_attr, + ap); va_end (ap); - /* ensure we collected all the arguments */ - for (i = 0; attribute_names[i]; i++) - if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0) - { - /* attribute not collected: could be caused by two things. - * - * 1) it doesn't exist in our list of attributes - * 2) it existed but was matched by a duplicate attribute earlier - * - * find out. - */ - int j; - - for (j = 0; j < i; j++) - if (strcmp (attribute_names[i], attribute_names[j]) == 0) - /* duplicate! */ - break; - - /* j is now the first occurrence of attribute_names[i] */ - if (i == j) - g_set_error (error, G_MARKUP_ERROR, - G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE, - "attribute '%s' invalid for element '%s'", - attribute_names[i], element_name); - else - g_set_error (error, G_MARKUP_ERROR, - G_MARKUP_ERROR_INVALID_CONTENT, - "attribute '%s' given multiple times for element '%s'", - attribute_names[i], element_name); - - goto failure; - } - - return TRUE; - -failure: - /* replay the above to free allocations */ - type = first_type; - attr = first_attr; - - va_start (ap, first_attr); - while (type != G_MARKUP_COLLECT_INVALID) - { - gpointer ptr; - - ptr = va_arg (ap, gpointer); - - if (ptr != NULL) - { - switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1)) - { - case G_MARKUP_COLLECT_STRDUP: - if (written) - g_free (*(char **) ptr); - - case G_MARKUP_COLLECT_STRING: - *(char **) ptr = NULL; - break; - - case G_MARKUP_COLLECT_BOOLEAN: - *(gboolean *) ptr = FALSE; - break; - - case G_MARKUP_COLLECT_TRISTATE: - *(gboolean *) ptr = -1; - break; - } - } - - type = va_arg (ap, GMarkupCollectType); - attr = va_arg (ap, const char *); - } - va_end (ap); - - return FALSE; + return retval; +} + +/** + * g_markup_collect_known_attributes: + * @element_name: the current tag name + * @attribute_names: (array zero-terminated=1): the attribute names + * @attribute_values: (array zero-terminated=1): the attribute values + * @error: (allow-none): a pointer to a #GError or %NULL + * @first_type: the #GMarkupCollectType of the first attribute + * @first_attr: the name of the first attribute + * @...: a pointer to the storage location of the first attribute + * (or %NULL), followed by more types names and pointers, ending + * with %G_MARKUP_COLLECT_INVALID + * + * Collects the attributes of the element from the data passed to the + * #GMarkupParser start_element function, dealing with common error + * conditions and supporting boolean values. + * + * This is a more relaxed version of g_markup_collect_attributes(), which + * ignores attributes found in @attribute_names but not listed in @first_attr + * or @...; by comparison g_markup_collect_attributes() will return + * %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE instead. Otherwise, this function behaves + * identically. + * + * This is intended for situations where the markup being parsed may use + * extensions in other namespaces and thus contain extra, unknown, attributes. + * + * Return value: %TRUE if successful + * + * Since: 2.34 + */ +gboolean +g_markup_collect_known_attributes (const gchar *element_name, + const gchar **attribute_names, + const gchar **attribute_values, + GError **error, + GMarkupCollectType first_type, + const gchar *first_attr, + ...) +{ + gboolean retval; + va_list ap; + + va_start (ap, first_attr); + retval = _g_markup_collect_attributesv (element_name, + attribute_names, attribute_values, + FALSE, error, + first_type, first_attr, + ap); + va_end (ap); + + return retval; } diff --git a/glib/gmarkup.h b/glib/gmarkup.h index a8865da9f..ee4ee8cf5 100644 --- a/glib/gmarkup.h +++ b/glib/gmarkup.h @@ -233,6 +233,15 @@ gboolean g_markup_collect_attributes (const gchar *element_name, const gchar *first_attr, ...); +GLIB_AVAILABLE_IN_2_34 +gboolean g_markup_collect_known_attributes (const gchar *element_name, + const gchar **attribute_names, + const gchar **attribute_values, + GError **error, + GMarkupCollectType first_type, + const gchar *first_attr, + ...); + G_END_DECLS #endif /* __G_MARKUP_H__ */ diff --git a/glib/tests/markup-collect.c b/glib/tests/markup-collect.c index 3b2e2bd79..65cc67b5b 100644 --- a/glib/tests/markup-collect.c +++ b/glib/tests/markup-collect.c @@ -1,11 +1,11 @@ -/* +/* * Copyright © 2007 Ryan Lortie - * + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. - * + * * See the included COPYING file for more information. */ @@ -13,6 +13,19 @@ #include #include +enum test_type +{ + COLLECT_ATTRIBUTES = 0, + COLLECT_KNOWN_ATTRIBUTES, + MAX_TEST_TYPE +}; + +struct test_data +{ + enum test_type test_type; + GString *string; +}; + static void start (GMarkupParseContext *context, const char *element_name, @@ -21,13 +34,26 @@ start (GMarkupParseContext *context, gpointer user_data, GError **error) { - GString *string = user_data; + struct test_data *data = user_data; gboolean result; -#define collect(...) \ - g_markup_collect_attributes (element_name, attribute_names, \ - attribute_values, error, __VA_ARGS__, \ - G_MARKUP_COLLECT_INVALID) +#define collect(...) G_STMT_START { \ + if (data->test_type == COLLECT_ATTRIBUTES) \ + { \ + result = \ + g_markup_collect_attributes (element_name, attribute_names, \ + attribute_values, error, __VA_ARGS__, \ + G_MARKUP_COLLECT_INVALID); \ + } \ + else \ + { \ + result = \ + g_markup_collect_known_attributes (element_name, attribute_names, \ + attribute_values, error, \ + __VA_ARGS__, \ + G_MARKUP_COLLECT_INVALID); \ + } \ + } G_STMT_END #define BOOL G_MARKUP_COLLECT_BOOLEAN #define OPTBOOL G_MARKUP_COLLECT_BOOLEAN | G_MARKUP_COLLECT_OPTIONAL #define TRI G_MARKUP_COLLECT_TRISTATE @@ -41,9 +67,9 @@ start (GMarkupParseContext *context, { gboolean mb = 2, ob = 2, tri = 2; - result = collect (BOOL, "mb", &mb, - OPTBOOL, "ob", &ob, - TRI, "tri", &tri); + collect (BOOL, "mb", &mb, + OPTBOOL, "ob", &ob, + TRI, "tri", &tri); g_assert (result || (mb == FALSE && ob == FALSE && tri != TRUE && tri != FALSE)); @@ -51,7 +77,7 @@ start (GMarkupParseContext *context, if (tri != FALSE && tri != TRUE) tri = -1; - g_string_append_printf (string, "", + g_string_append_printf (data->string, "", result, mb, ob, tri); } @@ -60,15 +86,15 @@ start (GMarkupParseContext *context, const char *cm, *co; char *am, *ao; - result = collect (STR, "cm", &cm, - STRDUP, "am", &am, - OPTDUP, "ao", &ao, - OPTSTR, "co", &co); + collect (STR, "cm", &cm, + STRDUP, "am", &am, + OPTDUP, "ao", &ao, + OPTSTR, "co", &co); g_assert (result || (cm == NULL && am == NULL && ao == NULL && co == NULL)); - g_string_append_printf (string, "", + g_string_append_printf (data->string, "", result, n (cm), n (am), n (ao), n (co)); g_free (am); @@ -140,34 +166,49 @@ static void test_collect (gconstpointer d) { const struct test *test = d; + enum test_type t; - GMarkupParseContext *ctx; - GError *error = NULL; - GString *string; - gboolean result; - - string = g_string_new (""); - ctx = g_markup_parse_context_new (&parser, 0, string, NULL); - result = g_markup_parse_context_parse (ctx, - test->document, - -1, &error); - if (result) - result = g_markup_parse_context_end_parse (ctx, &error); - - if (result) + for (t = 0; t < MAX_TEST_TYPE; t++) { - g_assert_no_error (error); - g_assert_cmpint (test->error_code, ==, 0); - g_assert_cmpstr (test->result, ==, string->str); - } - else - { - g_assert_error (error, G_MARKUP_ERROR, test->error_code); - } + GMarkupParseContext *ctx; + GError *error = NULL; + gboolean result; + struct test_data data; - g_markup_parse_context_free (ctx); - g_string_free (string, TRUE); - g_clear_error (&error); + data.test_type = t; + data.string = g_string_new (""); + + ctx = g_markup_parse_context_new (&parser, 0, &data, NULL); + result = g_markup_parse_context_parse (ctx, + test->document, + -1, &error); + if (result) + result = g_markup_parse_context_end_parse (ctx, &error); + + if (result && + !(t == COLLECT_KNOWN_ATTRIBUTES && + test->error_code == G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE)) + { + /* Normal test */ + g_assert_no_error (error); + g_assert_cmpint (test->error_code, ==, 0); + g_assert_cmpstr (test->result, ==, data.string->str); + } + else if (result) + { + /* Test expecting UNKNOWN_ATTRIBUTE, and we're parsing with + * collect_known_attributes(). */ + g_assert_no_error (error); + } + else + { + g_assert_error (error, G_MARKUP_ERROR, test->error_code); + } + + g_markup_parse_context_free (ctx); + g_string_free (data.string, TRUE); + g_clear_error (&error); + } } #define XML ""