schema parsing and markup fixes

wip
This commit is contained in:
Ryan Lortie
2013-11-05 12:17:21 -05:00
parent 1865885fb4
commit b3bee1d8d6
7 changed files with 271 additions and 137 deletions

View File

@@ -394,6 +394,7 @@ libgio_2_0_la_SOURCES = \
gioprivate.h \
giowin32-priv.h \
gloadableicon.c \
gmarkupreader.c \
gmount.c \
gmemoryinputstream.c \
gmemoryoutputstream.c \
@@ -569,6 +570,7 @@ gio_headers = \
gioscheduler.h \
giostream.h \
gloadableicon.h \
gmarkupreader.h \
gmount.h \
gmemoryinputstream.h \
gmemoryoutputstream.h \
@@ -685,7 +687,7 @@ gioenumtypes.c: $(gio_headers) gioenumtypes.c.template
gio-2.0.lib: libgio-2.0.la gio.def
lib -machine:@LIB_EXE_MACHINE_FLAG@ -name:libgio-2.0-$(LT_CURRENT_MINUS_AGE).dll -def:$(builddir)/gio.def -out:$@
bin_PROGRAMS = gio-querymodules glib-compile-schemas glib-compile-resources gsettings
bin_PROGRAMS = gio-querymodules glib-compile-schemas glib-compile-resources gsettings new-compiler
glib_compile_resources_LDADD = \
$(top_builddir)/glib/libglib-2.0.la \
@@ -709,6 +711,18 @@ gio_querymodules_LDADD = \
gconstructor_as_data.h: $(top_srcdir)/glib/gconstructor.h data-to-c.pl
$(AM_V_GEN) $(srcdir)/data-to-c.pl $(top_srcdir)/glib/gconstructor.h gconstructor_code > $@.tmp && mv $@.tmp $@
new_compiler_LDADD = \
$(top_builddir)/glib/libglib-2.0.la \
$(top_builddir)/gobject/libgobject-2.0.la \
libgio-2.0.la
new_compiler_SOURCES = \
gconstructor_as_data.h \
gvdb/gvdb-format.h \
gvdb/gvdb-builder.h \
gvdb/gvdb-builder.c \
new-compiler.c
glib_compile_schemas_LDADD = $(top_builddir)/glib/libglib-2.0.la
glib_compile_schemas_SOURCES = \
gconstructor_as_data.h \

View File

@@ -87,6 +87,7 @@
#include <gio/gioscheduler.h>
#include <gio/giostream.h>
#include <gio/gloadableicon.h>
#include <gio/gmarkupreader.h>
#include <gio/gmemoryinputstream.h>
#include <gio/gmemoryoutputstream.h>
#include <gio/gmount.h>

View File

@@ -337,6 +337,27 @@ g_markup_reader_is_text (GMarkupReader *reader)
return reader->state == READER_STATE_TEXT;
}
gboolean
g_markup_reader_is_whitespace (GMarkupReader *reader)
{
const gchar *data;
gsize length;
gsize i;
g_return_val_if_fail (G_IS_MARKUP_READER (reader), FALSE);
if (reader->state != READER_STATE_TEXT)
return FALSE;
data = g_bytes_get_data (reader->content, &length);
for (i = 0; i < length; i++)
if (!g_ascii_isspace (data[i]))
return FALSE;
return TRUE;
}
gboolean
g_markup_reader_is_eof (GMarkupReader *reader)
{
@@ -370,17 +391,27 @@ g_markup_reader_get_attributes (GMarkupReader *reader,
*attribute_values = (const gchar * const *) reader->attribute_values;
}
void
gboolean
g_markup_reader_collect_attributes (GMarkupReader *reader,
GError **error,
GMarkupCollectType first_type,
const gchar *first_name,
...)
{
gboolean ok;
va_list ap;
g_return_if_fail (G_IS_MARKUP_READER (reader));
g_return_if_fail (reader->state == READER_STATE_START_ELEMENT);
g_assert_not_reached ();
va_start (ap, first_name);
ok = g_markup_collect_attributesv (reader->element_name,
(const gchar **) reader->attribute_names,
(const gchar **) reader->attribute_values,
error, first_type, first_name, ap);
va_end (ap);
return ok;
}
GBytes *
@@ -406,18 +437,21 @@ g_markup_reader_unexpected (GMarkupReader *reader,
if (reader->state == READER_STATE_START_ELEMENT)
{
if (stack->next)
g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
"Element <%s> is not valid inside of <%s>", reader->element_name, (gchar *) stack->next->data);
g_markup_reader_set_error (reader, error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
"Element <%s> is not valid inside of <%s>",
reader->element_name, (gchar *) stack->next->data);
else
g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
"Element <%s> is not valid at the document toplevel", reader->element_name);
g_markup_reader_set_error (reader, error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
"Element <%s> is not valid at the document toplevel",
reader->element_name);
}
else /* TEXT */
{
g_assert (stack->next);
g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
"Text content is not valid inside of <%s>", (gchar *) stack->next->data);
g_markup_reader_set_error (reader, error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
"Text content is not valid inside of <%s>",
(gchar *) stack->next->data);
}
/* always 'fail' */
@@ -439,29 +473,117 @@ g_markup_reader_expect_end (GMarkupReader *reader,
return TRUE;
if (g_markup_reader_is_passthrough (reader))
continue;
continue; /* XXX: fixme? */
if (g_markup_reader_is_text (reader))
{
const gchar *data;
gsize length;
gsize i;
data = g_bytes_get_data (reader->content, &length);
for (i = 0; i < length; i++)
if (!g_ascii_isspace (data[i]))
{
const GSList *stack;
stack = g_markup_parse_context_get_element_stack (reader->context);
g_assert (stack->next);
g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_INVALID_CONTENT,
"Text content is not valid inside of <%s>", (gchar *) stack->next->data);
return FALSE;
}
}
if (!g_markup_reader_is_whitespace (reader))
return g_markup_reader_unexpected (reader, error);
}
return TRUE;
}
void
g_markup_reader_set_error (GMarkupReader *reader,
GError **error,
GQuark domain,
gint code,
const gchar *format,
...)
{
va_list ap;
g_return_if_fail (error == NULL || *error == NULL);
if (!error)
return;
va_start (ap, format);
*error = g_error_new_valist (domain, code, format, ap);
va_end (ap);
if (reader->context->flags & G_MARKUP_PREFIX_ERROR_POSITION)
g_prefix_error (error, "line %d, column %d: ", reader->context->line_number, reader->context->char_number);
}
gboolean
g_markup_reader_collect_elements (GMarkupReader *reader,
GCancellable *cancellable,
gpointer user_data,
GError **error,
const gchar *first_name,
...)
{
va_list ap;
while (g_markup_reader_advance (reader, cancellable, error))
{
if (g_markup_reader_is_end_element (reader) || g_markup_reader_is_eof (reader))
return TRUE;
if (g_markup_reader_is_start_element (reader, NULL))
{
const gchar *name = g_markup_reader_get_element_name (reader);
const gchar *n;
va_start (ap, first_name);
for (n = first_name; n; n = va_arg (ap, const gchar *))
{
typedef gboolean (* cb_t) (GMarkupReader *, GCancellable *, gpointer, GError **);
cb_t cb = va_arg (ap, cb_t);
if (g_str_equal (n, name))
{
if (!(* cb) (reader, cancellable, user_data, error))
{
va_end (ap);
return FALSE;
}
break;
}
}
va_end (ap);
}
else if (!g_markup_reader_is_whitespace (reader))
{
g_markup_reader_unexpected (reader, error);
break;
}
}
return FALSE;
}
gchar *
g_markup_reader_collect_text (GMarkupReader *reader,
GCancellable *cancellable,
GError **error)
{
GString *string;
string = g_string_new (NULL);
while (g_markup_reader_advance (reader, cancellable, error))
{
if (g_markup_reader_is_end_element (reader))
return g_string_free (string, FALSE);
if (g_markup_reader_is_text (reader))
{
GBytes *bytes;
bytes = g_markup_reader_get_content (reader);
g_string_append_len (string, g_bytes_get_data (bytes, NULL), g_bytes_get_size (bytes));
}
else
{
g_markup_reader_unexpected (reader, error);
break;
}
}
g_string_free (string, TRUE);
return NULL;
}

View File

@@ -66,6 +66,9 @@ gboolean g_markup_reader_is_passthrough (GMarkup
GLIB_AVAILABLE_IN_2_40
gboolean g_markup_reader_is_text (GMarkupReader *reader);
GLIB_AVAILABLE_IN_2_40
gboolean g_markup_reader_is_whitespace (GMarkupReader *reader);
GLIB_AVAILABLE_IN_2_40
const gchar * g_markup_reader_get_element_name (GMarkupReader *reader);
@@ -75,12 +78,20 @@ void g_markup_reader_get_attributes (GMarkup
const gchar * const **attribute_values);
GLIB_AVAILABLE_IN_2_40
void g_markup_reader_collect_attributes (GMarkupReader *content,
gboolean g_markup_reader_collect_attributes (GMarkupReader *reader,
GError **error,
GMarkupCollectType first_type,
const gchar *first_name,
...);
GLIB_AVAILABLE_IN_2_40
gboolean g_markup_reader_collect_elements (GMarkupReader *reader,
GCancellable *cancellable,
gpointer user_data,
GError **error,
const gchar *first_name,
...) G_GNUC_NULL_TERMINATED;
GLIB_AVAILABLE_IN_2_40
GBytes * g_markup_reader_get_content (GMarkupReader *reader);
@@ -92,6 +103,17 @@ GLIB_AVAILABLE_IN_2_40
gboolean g_markup_reader_expect_end (GMarkupReader *reader,
GCancellable *cancellable,
GError **error);
GLIB_AVAILABLE_IN_2_40
void g_markup_reader_set_error (GMarkupReader *reader,
GError **error,
GQuark domain,
gint code,
const gchar *format,
...);
GLIB_AVAILABLE_IN_2_40
gchar * g_markup_reader_collect_text (GMarkupReader *reader,
GCancellable *cancellable,
GError **error);
G_END_DECLS

View File

@@ -84,3 +84,14 @@ GLIB_AVAILABLE_IN_ALL
gboolean
g_markup_parse_context_parse_slightly (GMarkupParseContext *context,
GError **error);
GLIB_AVAILABLE_IN_ALL
gboolean
g_markup_collect_attributesv (const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
GError **error,
GMarkupCollectType first_type,
const gchar *first_attr,
va_list ap);

View File

@@ -21,19 +21,19 @@
#include "config.h"
#include "gmarkup.h"
#include "gmarkup-private.h"
#include <stdarg.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include "gmarkup.h"
#include "gatomic.h"
#include "gslice.h"
#include "galloca.h"
#include "gstrfuncs.h"
#include "gstring.h"
#include "gtestutils.h"
#include "glibintl.h"
#include "gthread.h"
@@ -86,86 +86,6 @@
G_DEFINE_QUARK (g-markup-error-quark, g_markup_error)
typedef enum
{
STATE_START,
STATE_AFTER_OPEN_ANGLE,
STATE_AFTER_CLOSE_ANGLE,
STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
STATE_INSIDE_OPEN_TAG_NAME,
STATE_INSIDE_ATTRIBUTE_NAME,
STATE_AFTER_ATTRIBUTE_NAME,
STATE_BETWEEN_ATTRIBUTES,
STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
STATE_INSIDE_TEXT,
STATE_AFTER_CLOSE_TAG_SLASH,
STATE_INSIDE_CLOSE_TAG_NAME,
STATE_AFTER_CLOSE_TAG_NAME,
STATE_INSIDE_PASSTHROUGH,
STATE_ERROR
} GMarkupParseState;
typedef struct
{
const char *prev_element;
const GMarkupParser *prev_parser;
gpointer prev_user_data;
} GMarkupRecursionTracker;
struct _GMarkupParseContext
{
const GMarkupParser *parser;
volatile gint ref_count;
GMarkupParseFlags flags;
gint line_number;
gint char_number;
GMarkupParseState state;
gpointer user_data;
GDestroyNotify dnotify;
/* A piece of character data or an element that
* hasn't "ended" yet so we haven't yet called
* the callback for it.
*/
GString *partial_chunk;
GSList *spare_chunks;
GSList *tag_stack;
GSList *tag_stack_gstr;
GSList *spare_list_nodes;
GString **attr_names;
GString **attr_values;
gint cur_attr;
gint alloc_attrs;
const gchar *current_text;
gssize current_text_len;
const gchar *current_text_end;
/* used to save the start of the last interesting thingy */
const gchar *start;
const gchar *iter;
guint document_empty : 1;
guint parsing : 1;
guint awaiting_pop : 1;
gint balance;
/* subparser support */
GSList *subparser_stack; /* (GMarkupRecursionTracker *) */
const char *subparser_element;
gpointer held_user_data;
};
/*
* Helpers to reduce our allocation overhead, we have
* a well defined allocation lifecycle.
@@ -1096,6 +1016,9 @@ emit_end_element (GMarkupParseContext *context,
pop_tag (context);
}
static void g_markup_parse_context_set_text (GMarkupParseContext *context,
const gchar *text,
gssize text_len);
/**
* g_markup_parse_context_parse:
* @context: a #GMarkupParseContext
@@ -1127,22 +1050,42 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
g_return_val_if_fail (!context->parsing, FALSE);
g_markup_parse_context_set_text (context, text, text_len);
while (context->iter != context->current_text_end)
if (!g_markup_parse_context_parse_slightly (context, error))
break;
context->parsing = FALSE;
return context->state != STATE_ERROR;
}
static void
g_markup_parse_context_set_text (GMarkupParseContext *context,
const gchar *text,
gssize text_len)
{
if (text_len < 0)
text_len = strlen (text);
if (text_len == 0)
return TRUE;
return;
context->parsing = TRUE;
context->current_text = text;
context->current_text_len = text_len;
context->current_text_end = context->current_text + text_len;
context->iter = context->current_text;
context->start = context->iter;
}
while (context->iter != context->current_text_end)
gboolean
g_markup_parse_context_parse_slightly (GMarkupParseContext *context,
GError **error)
{
if (context->iter != context->current_text_end)
{
switch (context->state)
{
@@ -1729,8 +1672,6 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
}
finished:
context->parsing = FALSE;
return context->state != STATE_ERROR;
}
@@ -2668,27 +2609,28 @@ g_markup_parse_boolean (const char *string,
* Since: 2.16
**/
gboolean
g_markup_collect_attributes (const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
GError **error,
GMarkupCollectType first_type,
const gchar *first_attr,
...)
g_markup_collect_attributesv (const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
GError **error,
GMarkupCollectType first_type,
const gchar *first_attr,
va_list ap)
{
GMarkupCollectType type;
const gchar *attr;
guint64 collected;
int written;
va_list ap;
va_list aq;
int i;
G_VA_COPY (aq, ap);
type = first_type;
attr = first_attr;
collected = 0;
written = 0;
va_start (ap, first_attr);
while (type != G_MARKUP_COLLECT_INVALID)
{
gboolean mandatory;
@@ -2733,7 +2675,6 @@ g_markup_collect_attributes (const gchar *element_name,
"element '%s' requires attribute '%s'",
element_name, attr);
va_end (ap);
goto failure;
}
@@ -2791,7 +2732,6 @@ g_markup_collect_attributes (const gchar *element_name,
"cannot be parsed as a boolean value",
element_name, attr, value);
va_end (ap);
goto failure;
}
}
@@ -2806,7 +2746,6 @@ g_markup_collect_attributes (const gchar *element_name,
attr = va_arg (ap, const char *);
written++;
}
va_end (ap);
/* ensure we collected all the arguments */
for (i = 0; attribute_names[i]; i++)
@@ -2841,6 +2780,8 @@ g_markup_collect_attributes (const gchar *element_name,
goto failure;
}
va_end (aq);
return TRUE;
failure:
@@ -2848,12 +2789,11 @@ failure:
type = first_type;
attr = first_attr;
va_start (ap, first_attr);
while (type != G_MARKUP_COLLECT_INVALID)
{
gpointer ptr;
ptr = va_arg (ap, gpointer);
ptr = va_arg (aq, gpointer);
if (ptr != NULL)
{
@@ -2877,10 +2817,30 @@ failure:
}
}
type = va_arg (ap, GMarkupCollectType);
attr = va_arg (ap, const char *);
type = va_arg (aq, GMarkupCollectType);
attr = va_arg (aq, const char *);
}
va_end (ap);
va_end (aq);
return FALSE;
}
gboolean
g_markup_collect_attributes (const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
GError **error,
GMarkupCollectType first_type,
const gchar *first_attr,
...)
{
gboolean ok;
va_list ap;
va_start (ap, first_attr);
ok = g_markup_collect_attributesv (element_name, attribute_names, attribute_values,
error, first_type, first_attr, ap);
va_end (ap);
return ok;
}

View File

@@ -91,6 +91,9 @@ GQuark g_markup_error_quark (void);
* attributes and tags, along with their contents. A qualified
* attribute or tag is one that contains ':' in its name (ie: is in
* another namespace). Since: 2.40.
* @G_MARKUP_IGNORE_PASSTHROUGH: Ignore (don't report) passthrough
* data on a #GMarkupReader. Meaningless with #GMarkupParseContext;
* just give a %NULL callback in your parser. Since: 2.40.
*
* Flags that affect the behaviour of the parser.
*/
@@ -99,7 +102,8 @@ typedef enum
G_MARKUP_DO_NOT_USE_THIS_UNSUPPORTED_FLAG = 1 << 0,
G_MARKUP_TREAT_CDATA_AS_TEXT = 1 << 1,
G_MARKUP_PREFIX_ERROR_POSITION = 1 << 2,
G_MARKUP_IGNORE_QUALIFIED = 1 << 3
G_MARKUP_IGNORE_QUALIFIED = 1 << 3,
G_MARKUP_IGNORE_PASSTHROUGH = 1 << 4
} GMarkupParseFlags;
/**