add functions g_markup_parse_context_{push,pop} in order to provide some

2008-07-10  Ryan Lortie  <desrt@desrt.ca>

        * docs/reference/glib/glib-sections.txt:
        * glib/glib.symbols:
        * glib/gmarkup.c:
        * glib/gmarkup.h: add functions g_markup_parse_context_{push,pop} in
        order to provide some small hooks on which to build easy-to-use
        subparsers.
 
        * glib/tests/Makefile: add new test
        * glib/tests/markup-subparser.c: new test for subparsers
 
        Fixes bug #337518.


svn path=/trunk/; revision=7174
This commit is contained in:
Ryan Lortie 2008-07-10 08:41:59 +00:00 committed by Ryan Lortie
parent 0a20d4cbe4
commit 178698c8d3
7 changed files with 673 additions and 2 deletions

View File

@ -1,3 +1,17 @@
2008-07-10 Ryan Lortie <desrt@desrt.ca>
* docs/reference/glib/glib-sections.txt:
* glib/glib.symbols:
* glib/gmarkup.c:
* glib/gmarkup.h: add functions g_markup_parse_context_{push,pop} in
order to provide some small hooks on which to build easy-to-use
subparsers.
* glib/tests/Makefile: add new test
* glib/tests/markup-subparser.c: new test for subparsers
Fixes bug #337518.
2008-07-05 Matthias Clasen <mclasen@redhat.com>
Bug 528317 GRegex does not allow recursion limit

View File

@ -1002,6 +1002,8 @@ g_markup_parse_context_get_element
g_markup_parse_context_get_element_stack
g_markup_parse_context_new
g_markup_parse_context_parse
g_markup_parse_context_push
g_markup_parse_context_pop
<SUBSECTION>
GMarkupCollectType
g_markup_collect_attributes

View File

@ -673,6 +673,8 @@ g_markup_parse_context_get_element_stack
g_markup_parse_context_get_position
g_markup_parse_context_new
g_markup_parse_context_parse
g_markup_parse_context_push
g_markup_parse_context_pop
g_markup_printf_escaped G_GNUC_PRINTF(1,2)
g_markup_vprintf_escaped
g_markup_collect_attributes

View File

@ -1,6 +1,7 @@
/* gmarkup.c - Simple XML-like parser
*
* Copyright 2000, 2003 Red Hat, Inc.
* Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
*
* GLib is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
@ -57,6 +58,13 @@ typedef enum
STATE_ERROR
} GMarkupParseState;
typedef struct
{
const char *prev_element;
const GMarkupParser *prev_parser;
gpointer prev_user_data;
} GMarkupRecursionTracker;
struct _GMarkupParseContext
{
const GMarkupParser *parser;
@ -95,7 +103,13 @@ struct _GMarkupParseContext
guint document_empty : 1;
guint parsing : 1;
guint awaiting_pop : 1;
gint balance;
/* subparser support */
GSList *subparser_stack; /* (GMarkupRecursionTracker *) */
const char *subparser_element;
gpointer held_user_data;
};
/**
@ -153,6 +167,13 @@ g_markup_parse_context_new (const GMarkupParser *parser,
context->document_empty = TRUE;
context->parsing = FALSE;
context->awaiting_pop = FALSE;
context->subparser_stack = NULL;
context->subparser_element = NULL;
/* this is only looked at if awaiting_pop = TRUE. initialise anyway. */
context->held_user_data = NULL;
context->balance = 0;
return context;
@ -163,14 +184,16 @@ g_markup_parse_context_new (const GMarkupParser *parser,
* @context: a #GMarkupParseContext
*
* Frees a #GMarkupParseContext. Can't be called from inside
* one of the #GMarkupParser functions.
*
* one of the #GMarkupParser functions. Can't be called while
* a subparser is pushed.
**/
void
g_markup_parse_context_free (GMarkupParseContext *context)
{
g_return_if_fail (context != NULL);
g_return_if_fail (!context->parsing);
g_return_if_fail (!context->subparser_stack);
g_return_if_fail (!context->awaiting_pop);
if (context->dnotify)
(* context->dnotify) (context->user_data);
@ -190,6 +213,8 @@ g_markup_parse_context_free (GMarkupParseContext *context)
g_free (context);
}
static void pop_subparser_stack (GMarkupParseContext *context);
static void
mark_error (GMarkupParseContext *context,
GError *error)
@ -198,6 +223,16 @@ mark_error (GMarkupParseContext *context,
if (context->parser->error)
(*context->parser->error) (context, error, context->user_data);
/* report the error all the way up to free all the user-data */
while (context->subparser_stack)
{
pop_subparser_stack (context);
context->awaiting_pop = FALSE; /* already been freed */
if (context->parser->error)
(*context->parser->error) (context, error, context->user_data);
}
}
static void set_error (GMarkupParseContext *context,
@ -827,6 +862,49 @@ current_element (GMarkupParseContext *context)
return context->tag_stack->data;
}
static void
pop_subparser_stack (GMarkupParseContext *context)
{
GMarkupRecursionTracker *tracker;
g_assert (context->subparser_stack);
tracker = context->subparser_stack->data;
context->awaiting_pop = TRUE;
context->held_user_data = context->user_data;
context->user_data = tracker->prev_user_data;
context->parser = tracker->prev_parser;
context->subparser_element = tracker->prev_element;
g_slice_free (GMarkupRecursionTracker, tracker);
context->subparser_stack = g_slist_delete_link (context->subparser_stack,
context->subparser_stack);
}
static void
possibly_finish_subparser (GMarkupParseContext *context)
{
if (current_element (context) == context->subparser_element)
pop_subparser_stack (context);
}
static void
ensure_no_outstanding_subparser (GMarkupParseContext *context)
{
if (context->awaiting_pop)
g_critical ("During the first end_element call after invoking a "
"subparser you must pop the subparser stack and handle "
"the freeing of the subparser user_data. This can be "
"done by calling the end function of the subparser. "
"Very probably, your program just leaked memory.");
/* let valgrind watch the pointer disappear... */
context->held_user_data = NULL;
context->awaiting_pop = FALSE;
}
static const gchar*
current_attribute (GMarkupParseContext *context)
{
@ -1154,12 +1232,16 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
g_assert (context->tag_stack != NULL);
possibly_finish_subparser (context);
tmp_error = NULL;
if (context->parser->end_element)
(* context->parser->end_element) (context,
context->tag_stack->data,
context->user_data,
&tmp_error);
ensure_no_outstanding_subparser (context);
if (tmp_error)
{
@ -1612,6 +1694,8 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
context->state = STATE_AFTER_CLOSE_ANGLE;
context->start = NULL;
possibly_finish_subparser (context);
/* call the end_element callback */
tmp_error = NULL;
if (context->parser->end_element)
@ -1620,6 +1704,7 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
context->user_data,
&tmp_error);
ensure_no_outstanding_subparser (context);
/* Pop the tag stack */
g_free (context->tag_stack->data);
@ -1934,6 +2019,190 @@ g_markup_parse_context_get_position (GMarkupParseContext *context,
*char_number = context->char_number;
}
/**
* g_markup_parse_context_push:
* @context: a #GMarkupParseContext
* @parser: a #GMarkupParser
* @user_data: user data to pass to #GMarkupParser functions
*
* Temporarily redirects markup data to a sub-parser.
*
* This function may only be called from the start_element handler of
* a #GMarkupParser. It must be matched with a corresponding call to
* g_markup_parse_context_pop() in the matching end_element handler
* (except in the case that the parser aborts due to an error).
*
* All tags, text and other data between the matching tags is
* redirected to the subparser given by @parser. @user_data is used
* as the user_data for that parser. @user_data is also passed to the
* error callback in the event that an error occurs. This includes
* errors that occur in subparsers of the subparser.
*
* The end tag matching the start tag for which this call was made is
* handled by the previous parser (which is given its own user_data)
* which is why g_markup_parse_context_pop() is provided to allow "one
* last access" to the @user_data provided to this function. In the
* case of error, the @user_data provided here is passed directly to
* the error callback of the subparser and g_markup_parse_context()
* should not be called. In either case, if @user_data was allocated
* then it ought to be freed from both of these locations.
*
* This function is not intended to be directly called by users
* interested in invoking subparsers. Instead, it is intended to be
* used by the subparsers themselves to implement a higher-level
* interface.
*
* As an example, see the following implementation of a simple
* parser that counts the number of tags encountered.
*
* |[
* typedef struct
* {
* gint tag_count;
* } CounterData;
*
* static void
* counter_start_element (GMarkupParseContext *context,
* const gchar *element_name,
* const gchar **attribute_names,
* const gchar **attribute_values,
* gpointer user_data,
* GError **error)
* {
* CounterData *data = user_data;
*
* data->tag_count++;
* }
*
* static void
* counter_error (GMarkupParseContext *context,
* GError *error,
* gpointer user_data)
* {
* CounterData *data = user_data;
*
* g_slice_free (CounterData, data);
* }
*
* static GMarkupParser counter_subparser =
* {
* counter_start_element,
* NULL,
* NULL,
* NULL,
* counter_error
* };
* ]|
*
* In order to allow this parser to be easily used as a subparser, the
* following interface is provided:
*
* |[
* void
* start_counting (GMarkupParseContext *context)
* {
* CounterData *data = g_slice_new (CounterData);
*
* data->tag_count = 0;
* g_markup_parse_context_push (context, &counter_subparser, data);
* }
*
* gint
* end_counting (GMarkupParseContext *context)
* {
* CounterData *data = g_markup_parse_context_pop (context);
* int result;
*
* result = data->tag_count;
* g_slice_free (CounterData, data);
*
* return result;
* }
* ]|
*
* The subparser would then be used as follows:
*
* |[
* static void start_element (context, element_name, ...)
* {
* if (strcmp (element_name, "count-these") == 0)
* start_counting (context);
*
* /&ast; else, handle other tags... &ast;/
* }
*
* static void end_element (context, element_name, ...)
* {
* if (strcmp (element_name, "count-these") == 0)
* g_print ("Counted %d tags\n", end_counting (context));
*
* /&ast; else, handle other tags... &ast;/
* }
* ]|
*
* Since: 2.18
**/
void
g_markup_parse_context_push (GMarkupParseContext *context,
GMarkupParser *parser,
gpointer user_data)
{
GMarkupRecursionTracker *tracker;
tracker = g_slice_new (GMarkupRecursionTracker);
tracker->prev_element = context->subparser_element;
tracker->prev_parser = context->parser;
tracker->prev_user_data = context->user_data;
context->subparser_element = current_element (context);
context->parser = parser;
context->user_data = user_data;
context->subparser_stack = g_slist_prepend (context->subparser_stack,
tracker);
}
/**
* g_markup_parse_context_pop:
* @context: a #GMarkupParseContext
*
* Completes the process of a temporary sub-parser redirection.
*
* This function exists to collect the user_data allocated by a
* matching call to g_markup_parse_context_push(). It must be called
* in the end_element handler corresponding to the start_element
* handler during which g_markup_parse_context_push() was called. You
* must not call this function from the error callback -- the
* @user_data is provided directly to the callback in that case.
*
* This function is not intended to be directly called by users
* interested in invoking subparsers. Instead, it is intended to be
* used by the subparsers themselves to implement a higher-level
* interface.
*
* Returns: the user_data passed to g_markup_parse_context_push().
*
* Since: 2.18
**/
gpointer
g_markup_parse_context_pop (GMarkupParseContext *context)
{
gpointer user_data;
if (!context->awaiting_pop)
possibly_finish_subparser (context);
g_assert (context->awaiting_pop);
context->awaiting_pop = FALSE;
/* valgrind friendliness */
user_data = context->held_user_data;
context->held_user_data = NULL;
return user_data;
}
static void
append_escaped_text (GString *str,
const gchar *text,

View File

@ -112,6 +112,10 @@ gboolean g_markup_parse_context_parse (GMarkupParseContext *context,
const gchar *text,
gssize text_len,
GError **error);
void g_markup_parse_context_push (GMarkupParseContext *context,
GMarkupParser *parser,
gpointer user_data);
gpointer g_markup_parse_context_pop (GMarkupParseContext *context);
gboolean g_markup_parse_context_end_parse (GMarkupParseContext *context,
GError **error);

View File

@ -22,6 +22,9 @@ TEST_PROGS += strfuncs
strfuncs_SOURCES = strfuncs.c
strfuncs_LDADD = $(progs_ldadd) -lm
TEST_PROGS += markup-subparser
markup_subparser_LDADD = $(progs_ldadd)
# some testing of gtester funcitonality
XMLLINT=xmllint
gtester-xmllint-check: # check testreport xml with xmllint if present

View File

@ -0,0 +1,377 @@
/*
* Copyright © 2008 Ryan Lortie
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* See the included COPYING file for more information.
*/
#include <string.h>
#include <stdio.h>
#include <glib.h>
/* keep track of GString instances to make sure nothing leaks */
static int strings_allocated;
/* === the GMarkupParser functions === */
static void
subparser_start_element (GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError **error)
{
g_string_append_printf (user_data, "{%s}", element_name);
/* we don't like trouble... */
if (strcmp (element_name, "trouble") == 0)
g_set_error (error, 0, 0, "we don't like trouble");
}
static void
subparser_end_element (GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error)
{
g_string_append_printf (user_data, "{/%s}", element_name);
}
static void
subparser_error (GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
g_string_free (user_data, TRUE);
strings_allocated--;
}
static GMarkupParser subparser_parser =
{
subparser_start_element,
subparser_end_element,
NULL,
NULL,
subparser_error
};
/* convenience functions for a parser that does not
* replay the starting tag into the subparser...
*/
static void
subparser_start (GMarkupParseContext *ctx)
{
gpointer user_data;
user_data = g_string_new (NULL);
strings_allocated++;
g_markup_parse_context_push (ctx, &subparser_parser, user_data);
}
static char *
subparser_end (GMarkupParseContext *ctx,
GError **error)
{
GString *string;
char *result;
string = g_markup_parse_context_pop (ctx);
result = string->str;
g_string_free (string, FALSE);
strings_allocated--;
if (result == NULL || result[0] == '\0')
{
g_free (result);
g_set_error (error, 0, 0, "got no data");
return NULL;
}
return result;
}
/* convenience functions for a parser that -does-
* replay the starting tag into the subparser...
*/
static gboolean
replay_parser_start (GMarkupParseContext *ctx,
const char *element_name,
const char **attribute_names,
const char **attribute_values,
GError **error)
{
GError *tmp_error = NULL;
gpointer user_data;
user_data = g_string_new (NULL);
strings_allocated++;
subparser_parser.start_element (ctx, element_name,
attribute_names, attribute_values,
user_data, &tmp_error);
if (tmp_error)
{
g_propagate_error (error, tmp_error);
g_string_free (user_data, TRUE);
strings_allocated--;
return FALSE;
}
g_markup_parse_context_push (ctx, &subparser_parser, user_data);
return TRUE;
}
static char *
replay_parser_end (GMarkupParseContext *ctx,
GError **error)
{
GError *tmp_error = NULL;
GString *string;
char *result;
string = g_markup_parse_context_pop (ctx);
subparser_parser.end_element (ctx, g_markup_parse_context_get_element (ctx),
string, &tmp_error);
if (tmp_error)
{
g_propagate_error (error, tmp_error);
g_string_free (string, TRUE);
strings_allocated--;
return FALSE;
}
result = string->str;
g_string_free (string, FALSE);
strings_allocated--;
if (result == NULL || result[0] == '\0')
{
g_free (result);
g_set_error (error, 0, 0, "got no data");
return NULL;
}
return result;
}
/* === start interface between subparser and calling parser === */
static void subparser_start (GMarkupParseContext *ctx);
static char *subparser_end (GMarkupParseContext *ctx,
GError **error);
/* === end interface between subparser and calling parser === */
/* === start interface between replay parser and calling parser === */
static gboolean replay_parser_start (GMarkupParseContext *ctx,
const char *element_name,
const char **attribute_names,
const char **attribute_values,
GError **error);
static char *replay_parser_end (GMarkupParseContext *ctx,
GError **error);
/* === end interface between replay parser and calling parser === */
/* now comes our parser for the test.
*
* we recognise the tags <test> and <sub>.
* <test> is ignored.
* <sub> invokes the subparser (no replay).
*
* "unknown tags" are passed to the reply subparser
* (so the unknown tag is fed to the subparser...)
*/
static void
start_element (GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError **error)
{
g_string_append_printf (user_data, "<%s>", element_name);
if (strcmp (element_name, "test") == 0)
{
/* do nothing */
}
else if (strcmp (element_name, "sub") == 0)
{
/* invoke subparser */
subparser_start (context);
}
else
{
/* unknown tag. invoke replay subparser */
if (!replay_parser_start (context, element_name,
attribute_names, attribute_values,
error))
return;
}
}
static void
end_element (GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error)
{
if (strcmp (element_name, "test") == 0)
{
/* do nothing */
}
else if (strcmp (element_name, "sub") == 0)
{
char *result;
if ((result = subparser_end (context, error)) == NULL)
return;
g_string_append_printf (user_data, "<<%s>>", result);
g_free (result);
}
else
{
char *result;
if ((result = replay_parser_end (context, error)) == NULL)
return;
g_string_append_printf (user_data, "[[%s]]", result);
g_free (result);
}
g_string_append_printf (user_data, "</%s>", element_name);
}
static GMarkupParser parser =
{
start_element,
end_element
};
typedef struct
{
const char *markup;
const char *result;
const char *error_message;
} TestCase;
void
test (gconstpointer user_data)
{
const TestCase *tc = user_data;
GMarkupParseContext *ctx;
GString *string;
gboolean result;
GError *error;
error = NULL;
string = g_string_new (NULL);
ctx = g_markup_parse_context_new (&parser, 0, string, NULL);
result = g_markup_parse_context_parse (ctx, tc->markup,
strlen (tc->markup), &error);
if (result)
result = g_markup_parse_context_end_parse (ctx, &error);
g_markup_parse_context_free (ctx);
g_assert (strings_allocated == 0);
if (result)
{
if (tc->error_message)
g_error ("expected failure (about '%s') passed!\n"
" in: %s\n out: %s",
tc->error_message, tc->markup, string->str);
}
else
{
if (!tc->error_message)
g_error ("unexpected failure: '%s'\n"
" in: %s\n out: %s",
error->message, tc->markup, string->str);
if (!strstr (error->message, tc->error_message))
g_error ("failed for the wrong reason.\n"
" expecting message about '%s'\n"
" got message '%s'\n"
" in: %s\n out: %s",
tc->error_message, error->message, tc->markup, string->str);
}
if (strcmp (string->str, tc->result) != 0)
g_error ("got the wrong result.\n"
" expected: '%s'\n"
" got: '%s'\n"
" input: %s",
tc->result, string->str, tc->markup);
if (error)
g_error_free (error);
g_string_free (string, TRUE);
}
TestCase test_cases[] = /* successful runs */
{
/* in */ /* out */
{ "<test/>", "<test></test>" },
{ "<sub><foo/></sub>", "<sub><<{foo}{/foo}>></sub>" },
{ "<sub><foo/><bar/></sub>", "<sub><<{foo}{/foo}{bar}{/bar}>></sub>" },
{ "<foo><bar/></foo>", "<foo>[[{foo}{bar}{/bar}{/foo}]]</foo>" },
{ "<foo><x/><y/></foo>", "<foo>[[{foo}{x}{/x}{y}{/y}{/foo}]]</foo>" },
{ "<foo/>", "<foo>[[{foo}{/foo}]]</foo>" },
{ "<sub><foo/></sub><bar/>", "<sub><<{foo}{/foo}>></sub>"
"<bar>[[{bar}{/bar}]]</bar>" }
};
TestCase error_cases[] = /* error cases */
{
/* in */ /* out */ /* error */
{ "<foo><>", "<foo>", ">"},
{ "", "", "empty" },
{ "<trouble/>", "<trouble>", "trouble" },
{ "<sub><trouble>", "<sub>", "trouble" },
{ "<foo><trouble>", "<foo>", "trouble" },
{ "<sub></sub>", "<sub>", "no data" },
{ "<sub/>", "<sub>", "no data" }
};
#define add_tests(func, basename, array) \
G_STMT_START { \
int __add_tests_i; \
\
for (__add_tests_i = 0; \
__add_tests_i < G_N_ELEMENTS (array); \
__add_tests_i++) \
{ \
char *testname; \
\
testname = g_strdup_printf ("%s/%d", basename, __add_tests_i); \
g_test_add_data_func (testname, &array[__add_tests_i], func); \
g_free (testname); \
} \
} G_STMT_END
int
main (int argc, char **argv)
{
g_test_init (&argc, &argv, NULL);
add_tests (test, "/glib/markup/subparser/success", test_cases);
add_tests (test, "/glib/markup/subparser/failure", error_cases);
return g_test_run ();
}