mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-03-15 04:05:11 +01:00
Optimizations; don't scan the entire text in find_current_text_end(),
2004-11-28 Matthias Clasen <mclasen@redhat.com> * glib/gmarkup.c: Optimizations; don't scan the entire text in find_current_text_end(), split unescape_text() into multiple functions. (#159001, Havoc Pennington)
This commit is contained in:
parent
c77ae3942c
commit
86c78552ec
@ -1,3 +1,9 @@
|
|||||||
|
2004-11-28 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/gmarkup.c: Optimizations; don't scan the entire text
|
||||||
|
in find_current_text_end(), split unescape_text() into multiple
|
||||||
|
functions. (#159001, Havoc Pennington)
|
||||||
|
|
||||||
2004-11-27 Matthias Clasen <mclasen@redhat.com>
|
2004-11-27 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
|
* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
2004-11-28 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/gmarkup.c: Optimizations; don't scan the entire text
|
||||||
|
in find_current_text_end(), split unescape_text() into multiple
|
||||||
|
functions. (#159001, Havoc Pennington)
|
||||||
|
|
||||||
2004-11-27 Matthias Clasen <mclasen@redhat.com>
|
2004-11-27 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
|
* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
2004-11-28 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/gmarkup.c: Optimizations; don't scan the entire text
|
||||||
|
in find_current_text_end(), split unescape_text() into multiple
|
||||||
|
functions. (#159001, Havoc Pennington)
|
||||||
|
|
||||||
2004-11-27 Matthias Clasen <mclasen@redhat.com>
|
2004-11-27 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
|
* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
2004-11-28 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/gmarkup.c: Optimizations; don't scan the entire text
|
||||||
|
in find_current_text_end(), split unescape_text() into multiple
|
||||||
|
functions. (#159001, Havoc Pennington)
|
||||||
|
|
||||||
2004-11-27 Matthias Clasen <mclasen@redhat.com>
|
2004-11-27 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
|
* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
2004-11-28 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/gmarkup.c: Optimizations; don't scan the entire text
|
||||||
|
in find_current_text_end(), split unescape_text() into multiple
|
||||||
|
functions. (#159001, Havoc Pennington)
|
||||||
|
|
||||||
2004-11-27 Matthias Clasen <mclasen@redhat.com>
|
2004-11-27 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
|
* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
|
||||||
|
673
glib/gmarkup.c
673
glib/gmarkup.c
@ -235,12 +235,24 @@ set_error (GMarkupParseContext *context,
|
|||||||
g_propagate_error (error, tmp_error);
|
g_propagate_error (error, tmp_error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* To make these faster, we first use the ascii-only tests, then check
|
||||||
|
* for the usual non-alnum name-end chars, and only then call the
|
||||||
|
* expensive unicode stuff. Nobody uses non-ascii in XML tag/attribute
|
||||||
|
* names, so this is a reasonable hack that virtually always avoids
|
||||||
|
* the guniprop call.
|
||||||
|
*/
|
||||||
|
#define IS_COMMON_NAME_END_CHAR(c) \
|
||||||
|
((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
|
||||||
|
|
||||||
static gboolean
|
static gboolean
|
||||||
is_name_start_char (gunichar c)
|
is_name_start_char (gunichar c)
|
||||||
{
|
{
|
||||||
if (g_unichar_isalpha (c) ||
|
if (g_ascii_isalpha (c) ||
|
||||||
c == '_' ||
|
(!IS_COMMON_NAME_END_CHAR (c) &&
|
||||||
c == ':')
|
(g_unichar_isalpha (c) ||
|
||||||
|
c == '_' ||
|
||||||
|
c == ':')))
|
||||||
return TRUE;
|
return TRUE;
|
||||||
else
|
else
|
||||||
return FALSE;
|
return FALSE;
|
||||||
@ -249,11 +261,13 @@ is_name_start_char (gunichar c)
|
|||||||
static gboolean
|
static gboolean
|
||||||
is_name_char (gunichar c)
|
is_name_char (gunichar c)
|
||||||
{
|
{
|
||||||
if (g_unichar_isalnum (c) ||
|
if (g_ascii_isalnum (c) ||
|
||||||
c == '.' ||
|
(!IS_COMMON_NAME_END_CHAR (c) &&
|
||||||
c == '-' ||
|
(g_unichar_isalnum (c) ||
|
||||||
c == '_' ||
|
c == '.' ||
|
||||||
c == ':')
|
c == '-' ||
|
||||||
|
c == '_' ||
|
||||||
|
c == ':')))
|
||||||
return TRUE;
|
return TRUE;
|
||||||
else
|
else
|
||||||
return FALSE;
|
return FALSE;
|
||||||
@ -326,296 +340,369 @@ typedef enum
|
|||||||
USTATE_AFTER_CHARREF_HASH
|
USTATE_AFTER_CHARREF_HASH
|
||||||
} UnescapeState;
|
} UnescapeState;
|
||||||
|
|
||||||
static gboolean
|
typedef struct
|
||||||
unescape_text (GMarkupParseContext *context,
|
|
||||||
const gchar *text,
|
|
||||||
const gchar *text_end,
|
|
||||||
gchar **unescaped,
|
|
||||||
GError **error)
|
|
||||||
{
|
{
|
||||||
#define MAX_ENT_LEN 5
|
GMarkupParseContext *context;
|
||||||
GString *str;
|
GString *str;
|
||||||
const gchar *p;
|
|
||||||
UnescapeState state;
|
UnescapeState state;
|
||||||
|
const gchar *text;
|
||||||
|
const gchar *text_end;
|
||||||
|
const gchar *entity_start;
|
||||||
|
} UnescapeContext;
|
||||||
|
|
||||||
|
static const gchar*
|
||||||
|
unescape_text_state_inside_text (UnescapeContext *ucontext,
|
||||||
|
const gchar *p,
|
||||||
|
GError **error)
|
||||||
|
{
|
||||||
const gchar *start;
|
const gchar *start;
|
||||||
gboolean normalize_attribute;
|
gboolean normalize_attribute;
|
||||||
|
|
||||||
str = g_string_new (NULL);
|
if (ucontext->context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
|
||||||
|
ucontext->context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
|
||||||
if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
|
|
||||||
context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
|
|
||||||
normalize_attribute = TRUE;
|
normalize_attribute = TRUE;
|
||||||
else
|
else
|
||||||
normalize_attribute = FALSE;
|
normalize_attribute = FALSE;
|
||||||
|
|
||||||
state = USTATE_INSIDE_TEXT;
|
|
||||||
p = text;
|
|
||||||
start = p;
|
start = p;
|
||||||
|
|
||||||
|
while (p != ucontext->text_end)
|
||||||
|
{
|
||||||
|
if (*p == '&')
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (normalize_attribute && (*p == '\t' || *p == '\n'))
|
||||||
|
{
|
||||||
|
g_string_append_len (ucontext->str, start, p - start);
|
||||||
|
g_string_append_c (ucontext->str, ' ');
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
start = p;
|
||||||
|
}
|
||||||
|
else if (*p == '\r')
|
||||||
|
{
|
||||||
|
g_string_append_len (ucontext->str, start, p - start);
|
||||||
|
g_string_append_c (ucontext->str, normalize_attribute ? ' ' : '\n');
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
if (p != ucontext->text_end && *p == '\n')
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
start = p;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p != start)
|
||||||
|
g_string_append_len (ucontext->str, start, p - start);
|
||||||
|
|
||||||
|
if (p != ucontext->text_end && *p == '&')
|
||||||
|
{
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
ucontext->state = USTATE_AFTER_AMPERSAND;
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__ ((noinline))
|
||||||
|
static const gchar*
|
||||||
|
unescape_text_state_after_ampersand (UnescapeContext *ucontext,
|
||||||
|
const gchar *p,
|
||||||
|
GError **error)
|
||||||
|
{
|
||||||
|
ucontext->entity_start = NULL;
|
||||||
|
|
||||||
|
if (*p == '#')
|
||||||
|
{
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
|
||||||
|
ucontext->entity_start = p;
|
||||||
|
ucontext->state = USTATE_AFTER_CHARREF_HASH;
|
||||||
|
}
|
||||||
|
else if (!is_name_start_char (g_utf8_get_char (p)))
|
||||||
|
{
|
||||||
|
if (*p == ';')
|
||||||
|
{
|
||||||
|
set_unescape_error (ucontext->context, error,
|
||||||
|
p, ucontext->text_end,
|
||||||
|
G_MARKUP_ERROR_PARSE,
|
||||||
|
_("Empty entity '&;' seen; valid "
|
||||||
|
"entities are: & " < > '"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gchar buf[7];
|
||||||
|
|
||||||
|
set_unescape_error (ucontext->context, error,
|
||||||
|
p, ucontext->text_end,
|
||||||
|
G_MARKUP_ERROR_PARSE,
|
||||||
|
_("Character '%s' is not valid at "
|
||||||
|
"the start of an entity name; "
|
||||||
|
"the & character begins an entity; "
|
||||||
|
"if this ampersand isn't supposed "
|
||||||
|
"to be an entity, escape it as "
|
||||||
|
"&"),
|
||||||
|
utf8_str (p, buf));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ucontext->entity_start = p;
|
||||||
|
ucontext->state = USTATE_INSIDE_ENTITY_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__ ((noinline))
|
||||||
|
static const gchar*
|
||||||
|
unescape_text_state_inside_entity_name (UnescapeContext *ucontext,
|
||||||
|
const gchar *p,
|
||||||
|
GError **error)
|
||||||
|
{
|
||||||
|
#define MAX_ENT_LEN 5
|
||||||
|
gchar buf[MAX_ENT_LEN+1] = {
|
||||||
|
'\0', '\0', '\0', '\0', '\0', '\0'
|
||||||
|
};
|
||||||
|
gchar *dest;
|
||||||
|
|
||||||
|
while (p != ucontext->text_end)
|
||||||
|
{
|
||||||
|
if (*p == ';')
|
||||||
|
break;
|
||||||
|
else if (!is_name_char (*p))
|
||||||
|
{
|
||||||
|
gchar ubuf[7];
|
||||||
|
|
||||||
|
set_unescape_error (ucontext->context, error,
|
||||||
|
p, ucontext->text_end,
|
||||||
|
G_MARKUP_ERROR_PARSE,
|
||||||
|
_("Character '%s' is not valid "
|
||||||
|
"inside an entity name"),
|
||||||
|
utf8_str (p, ubuf));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ucontext->context->state != STATE_ERROR)
|
||||||
|
{
|
||||||
|
if (p != ucontext->text_end)
|
||||||
|
{
|
||||||
|
const gchar *src;
|
||||||
|
|
||||||
|
src = ucontext->entity_start;
|
||||||
|
dest = buf;
|
||||||
|
while (src != p)
|
||||||
|
{
|
||||||
|
*dest = *src;
|
||||||
|
++dest;
|
||||||
|
++src;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* move to after semicolon */
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
ucontext->state = USTATE_INSIDE_TEXT;
|
||||||
|
|
||||||
|
if (strcmp (buf, "lt") == 0)
|
||||||
|
g_string_append_c (ucontext->str, '<');
|
||||||
|
else if (strcmp (buf, "gt") == 0)
|
||||||
|
g_string_append_c (ucontext->str, '>');
|
||||||
|
else if (strcmp (buf, "amp") == 0)
|
||||||
|
g_string_append_c (ucontext->str, '&');
|
||||||
|
else if (strcmp (buf, "quot") == 0)
|
||||||
|
g_string_append_c (ucontext->str, '"');
|
||||||
|
else if (strcmp (buf, "apos") == 0)
|
||||||
|
g_string_append_c (ucontext->str, '\'');
|
||||||
|
else
|
||||||
|
{
|
||||||
|
set_unescape_error (ucontext->context, error,
|
||||||
|
p, ucontext->text_end,
|
||||||
|
G_MARKUP_ERROR_PARSE,
|
||||||
|
_("Entity name '%s' is not known"),
|
||||||
|
buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
set_unescape_error (ucontext->context, error,
|
||||||
|
/* give line number of the & */
|
||||||
|
ucontext->entity_start, ucontext->text_end,
|
||||||
|
G_MARKUP_ERROR_PARSE,
|
||||||
|
_("Entity did not end with a semicolon; "
|
||||||
|
"most likely you used an ampersand "
|
||||||
|
"character without intending to start "
|
||||||
|
"an entity - escape ampersand as &"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#undef MAX_ENT_LEN
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__ ((noinline))
|
||||||
|
static const gchar*
|
||||||
|
unescape_text_state_after_charref_hash (UnescapeContext *ucontext,
|
||||||
|
const gchar *p,
|
||||||
|
GError **error)
|
||||||
|
{
|
||||||
|
gboolean is_hex = FALSE;
|
||||||
|
const char *start;
|
||||||
|
|
||||||
|
start = ucontext->entity_start;
|
||||||
|
|
||||||
|
if (*p == 'x')
|
||||||
|
{
|
||||||
|
is_hex = TRUE;
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
start = p;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (p != ucontext->text_end && *p != ';')
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
|
||||||
|
if (p != ucontext->text_end)
|
||||||
|
{
|
||||||
|
g_assert (*p == ';');
|
||||||
|
|
||||||
|
/* digit is between start and p */
|
||||||
|
|
||||||
|
if (start != p)
|
||||||
|
{
|
||||||
|
gchar *digit = g_strndup (start, p - start);
|
||||||
|
gulong l;
|
||||||
|
gchar *end = NULL;
|
||||||
|
gchar *digit_end = digit + (p - start);
|
||||||
|
|
||||||
|
errno = 0;
|
||||||
|
if (is_hex)
|
||||||
|
l = strtoul (digit, &end, 16);
|
||||||
|
else
|
||||||
|
l = strtoul (digit, &end, 10);
|
||||||
|
|
||||||
|
if (end != digit_end || errno != 0)
|
||||||
|
{
|
||||||
|
set_unescape_error (ucontext->context, error,
|
||||||
|
start, ucontext->text_end,
|
||||||
|
G_MARKUP_ERROR_PARSE,
|
||||||
|
_("Failed to parse '%s', which "
|
||||||
|
"should have been a digit "
|
||||||
|
"inside a character reference "
|
||||||
|
"(ê for example) - perhaps "
|
||||||
|
"the digit is too large"),
|
||||||
|
digit);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* characters XML permits */
|
||||||
|
if (l == 0x9 ||
|
||||||
|
l == 0xA ||
|
||||||
|
l == 0xD ||
|
||||||
|
(l >= 0x20 && l <= 0xD7FF) ||
|
||||||
|
(l >= 0xE000 && l <= 0xFFFD) ||
|
||||||
|
(l >= 0x10000 && l <= 0x10FFFF))
|
||||||
|
{
|
||||||
|
gchar buf[7];
|
||||||
|
g_string_append (ucontext->str, char_str (l, buf));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
set_unescape_error (ucontext->context, error,
|
||||||
|
start, ucontext->text_end,
|
||||||
|
G_MARKUP_ERROR_PARSE,
|
||||||
|
_("Character reference '%s' does not encode a permitted character"),
|
||||||
|
digit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free (digit);
|
||||||
|
|
||||||
|
/* Move to next state */
|
||||||
|
p = g_utf8_next_char (p); /* past semicolon */
|
||||||
|
ucontext->state = USTATE_INSIDE_TEXT;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
set_unescape_error (ucontext->context, error,
|
||||||
|
start, ucontext->text_end,
|
||||||
|
G_MARKUP_ERROR_PARSE,
|
||||||
|
_("Empty character reference; "
|
||||||
|
"should include a digit such as "
|
||||||
|
"dž"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
set_unescape_error (ucontext->context, error,
|
||||||
|
start, ucontext->text_end,
|
||||||
|
G_MARKUP_ERROR_PARSE,
|
||||||
|
_("Character reference did not end with a "
|
||||||
|
"semicolon; "
|
||||||
|
"most likely you used an ampersand "
|
||||||
|
"character without intending to start "
|
||||||
|
"an entity - escape ampersand as &"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
unescape_text (GMarkupParseContext *context,
|
||||||
|
const gchar *text,
|
||||||
|
const gchar *text_end,
|
||||||
|
GString **unescaped,
|
||||||
|
GError **error)
|
||||||
|
{
|
||||||
|
UnescapeContext ucontext;
|
||||||
|
const gchar *p;
|
||||||
|
|
||||||
|
ucontext.context = context;
|
||||||
|
ucontext.text = text;
|
||||||
|
ucontext.text_end = text_end;
|
||||||
|
ucontext.entity_start = NULL;
|
||||||
|
|
||||||
|
ucontext.str = g_string_sized_new (text_end - text);
|
||||||
|
|
||||||
|
ucontext.state = USTATE_INSIDE_TEXT;
|
||||||
|
p = text;
|
||||||
|
|
||||||
while (p != text_end && context->state != STATE_ERROR)
|
while (p != text_end && context->state != STATE_ERROR)
|
||||||
{
|
{
|
||||||
g_assert (p < text_end);
|
g_assert (p < text_end);
|
||||||
|
|
||||||
switch (state)
|
switch (ucontext.state)
|
||||||
{
|
{
|
||||||
case USTATE_INSIDE_TEXT:
|
case USTATE_INSIDE_TEXT:
|
||||||
{
|
{
|
||||||
while (p != text_end && *p != '&')
|
p = unescape_text_state_inside_text (&ucontext,
|
||||||
{
|
p,
|
||||||
if ((*p == '\t' || *p == '\n') && normalize_attribute)
|
error);
|
||||||
{
|
|
||||||
g_string_append_len (str, start, p - start);
|
|
||||||
g_string_append_c (str, ' ');
|
|
||||||
p = g_utf8_next_char (p);
|
|
||||||
start = p;
|
|
||||||
}
|
|
||||||
else if (*p == '\r')
|
|
||||||
{
|
|
||||||
g_string_append_len (str, start, p - start);
|
|
||||||
g_string_append_c (str, normalize_attribute ? ' ' : '\n');
|
|
||||||
p = g_utf8_next_char (p);
|
|
||||||
if (*p == '\n')
|
|
||||||
p = g_utf8_next_char (p);
|
|
||||||
start = p;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
p = g_utf8_next_char (p);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (p != start)
|
|
||||||
{
|
|
||||||
g_string_append_len (str, start, p - start);
|
|
||||||
|
|
||||||
start = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (p != text_end && *p == '&')
|
|
||||||
{
|
|
||||||
p = g_utf8_next_char (p);
|
|
||||||
state = USTATE_AFTER_AMPERSAND;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case USTATE_AFTER_AMPERSAND:
|
case USTATE_AFTER_AMPERSAND:
|
||||||
{
|
{
|
||||||
if (*p == '#')
|
p = unescape_text_state_after_ampersand (&ucontext,
|
||||||
{
|
p,
|
||||||
p = g_utf8_next_char (p);
|
error);
|
||||||
|
|
||||||
start = p;
|
|
||||||
state = USTATE_AFTER_CHARREF_HASH;
|
|
||||||
}
|
|
||||||
else if (!is_name_start_char (g_utf8_get_char (p)))
|
|
||||||
{
|
|
||||||
if (*p == ';')
|
|
||||||
{
|
|
||||||
set_unescape_error (context, error,
|
|
||||||
p, text_end,
|
|
||||||
G_MARKUP_ERROR_PARSE,
|
|
||||||
_("Empty entity '&;' seen; valid "
|
|
||||||
"entities are: & " < > '"));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
gchar buf[7];
|
|
||||||
|
|
||||||
set_unescape_error (context, error,
|
|
||||||
p, text_end,
|
|
||||||
G_MARKUP_ERROR_PARSE,
|
|
||||||
_("Character '%s' is not valid at "
|
|
||||||
"the start of an entity name; "
|
|
||||||
"the & character begins an entity; "
|
|
||||||
"if this ampersand isn't supposed "
|
|
||||||
"to be an entity, escape it as "
|
|
||||||
"&"),
|
|
||||||
utf8_str (p, buf));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
start = p;
|
|
||||||
state = USTATE_INSIDE_ENTITY_NAME;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
case USTATE_INSIDE_ENTITY_NAME:
|
case USTATE_INSIDE_ENTITY_NAME:
|
||||||
{
|
{
|
||||||
gchar buf[MAX_ENT_LEN+1] = {
|
p = unescape_text_state_inside_entity_name (&ucontext,
|
||||||
'\0', '\0', '\0', '\0', '\0', '\0'
|
p,
|
||||||
};
|
error);
|
||||||
gchar *dest;
|
|
||||||
|
|
||||||
while (p != text_end)
|
|
||||||
{
|
|
||||||
if (*p == ';')
|
|
||||||
break;
|
|
||||||
else if (!is_name_char (*p))
|
|
||||||
{
|
|
||||||
gchar ubuf[7];
|
|
||||||
|
|
||||||
set_unescape_error (context, error,
|
|
||||||
p, text_end,
|
|
||||||
G_MARKUP_ERROR_PARSE,
|
|
||||||
_("Character '%s' is not valid "
|
|
||||||
"inside an entity name"),
|
|
||||||
utf8_str (p, ubuf));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
p = g_utf8_next_char (p);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (context->state != STATE_ERROR)
|
|
||||||
{
|
|
||||||
if (p != text_end)
|
|
||||||
{
|
|
||||||
const gchar *src;
|
|
||||||
|
|
||||||
src = start;
|
|
||||||
dest = buf;
|
|
||||||
while (src != p)
|
|
||||||
{
|
|
||||||
*dest = *src;
|
|
||||||
++dest;
|
|
||||||
++src;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* move to after semicolon */
|
|
||||||
p = g_utf8_next_char (p);
|
|
||||||
start = p;
|
|
||||||
state = USTATE_INSIDE_TEXT;
|
|
||||||
|
|
||||||
if (strcmp (buf, "lt") == 0)
|
|
||||||
g_string_append_c (str, '<');
|
|
||||||
else if (strcmp (buf, "gt") == 0)
|
|
||||||
g_string_append_c (str, '>');
|
|
||||||
else if (strcmp (buf, "amp") == 0)
|
|
||||||
g_string_append_c (str, '&');
|
|
||||||
else if (strcmp (buf, "quot") == 0)
|
|
||||||
g_string_append_c (str, '"');
|
|
||||||
else if (strcmp (buf, "apos") == 0)
|
|
||||||
g_string_append_c (str, '\'');
|
|
||||||
else
|
|
||||||
{
|
|
||||||
set_unescape_error (context, error,
|
|
||||||
p, text_end,
|
|
||||||
G_MARKUP_ERROR_PARSE,
|
|
||||||
_("Entity name '%s' is not known"),
|
|
||||||
buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
set_unescape_error (context, error,
|
|
||||||
/* give line number of the & */
|
|
||||||
start, text_end,
|
|
||||||
G_MARKUP_ERROR_PARSE,
|
|
||||||
_("Entity did not end with a semicolon; "
|
|
||||||
"most likely you used an ampersand "
|
|
||||||
"character without intending to start "
|
|
||||||
"an entity - escape ampersand as &"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case USTATE_AFTER_CHARREF_HASH:
|
case USTATE_AFTER_CHARREF_HASH:
|
||||||
{
|
{
|
||||||
gboolean is_hex = FALSE;
|
p = unescape_text_state_after_charref_hash (&ucontext,
|
||||||
if (*p == 'x')
|
p,
|
||||||
{
|
error);
|
||||||
is_hex = TRUE;
|
|
||||||
p = g_utf8_next_char (p);
|
|
||||||
start = p;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (p != text_end && *p != ';')
|
|
||||||
p = g_utf8_next_char (p);
|
|
||||||
|
|
||||||
if (p != text_end)
|
|
||||||
{
|
|
||||||
g_assert (*p == ';');
|
|
||||||
|
|
||||||
/* digit is between start and p */
|
|
||||||
|
|
||||||
if (start != p)
|
|
||||||
{
|
|
||||||
gchar *digit = g_strndup (start, p - start);
|
|
||||||
gulong l;
|
|
||||||
gchar *end = NULL;
|
|
||||||
gchar *digit_end = digit + (p - start);
|
|
||||||
|
|
||||||
errno = 0;
|
|
||||||
if (is_hex)
|
|
||||||
l = strtoul (digit, &end, 16);
|
|
||||||
else
|
|
||||||
l = strtoul (digit, &end, 10);
|
|
||||||
|
|
||||||
if (end != digit_end || errno != 0)
|
|
||||||
{
|
|
||||||
set_unescape_error (context, error,
|
|
||||||
start, text_end,
|
|
||||||
G_MARKUP_ERROR_PARSE,
|
|
||||||
_("Failed to parse '%s', which "
|
|
||||||
"should have been a digit "
|
|
||||||
"inside a character reference "
|
|
||||||
"(ê for example) - perhaps "
|
|
||||||
"the digit is too large"),
|
|
||||||
digit);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* characters XML permits */
|
|
||||||
if (l == 0x9 ||
|
|
||||||
l == 0xA ||
|
|
||||||
l == 0xD ||
|
|
||||||
(l >= 0x20 && l <= 0xD7FF) ||
|
|
||||||
(l >= 0xE000 && l <= 0xFFFD) ||
|
|
||||||
(l >= 0x10000 && l <= 0x10FFFF))
|
|
||||||
{
|
|
||||||
gchar buf[7];
|
|
||||||
g_string_append (str, char_str (l, buf));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
set_unescape_error (context, error,
|
|
||||||
start, text_end,
|
|
||||||
G_MARKUP_ERROR_PARSE,
|
|
||||||
_("Character reference '%s' does not encode a permitted character"),
|
|
||||||
digit);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
g_free (digit);
|
|
||||||
|
|
||||||
/* Move to next state */
|
|
||||||
p = g_utf8_next_char (p); /* past semicolon */
|
|
||||||
start = p;
|
|
||||||
state = USTATE_INSIDE_TEXT;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
set_unescape_error (context, error,
|
|
||||||
start, text_end,
|
|
||||||
G_MARKUP_ERROR_PARSE,
|
|
||||||
_("Empty character reference; "
|
|
||||||
"should include a digit such as "
|
|
||||||
"dž"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
set_unescape_error (context, error,
|
|
||||||
start, text_end,
|
|
||||||
G_MARKUP_ERROR_PARSE,
|
|
||||||
_("Character reference did not end with a "
|
|
||||||
"semicolon; "
|
|
||||||
"most likely you used an ampersand "
|
|
||||||
"character without intending to start "
|
|
||||||
"an entity - escape ampersand as &"));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -627,7 +714,7 @@ unescape_text (GMarkupParseContext *context,
|
|||||||
|
|
||||||
if (context->state != STATE_ERROR)
|
if (context->state != STATE_ERROR)
|
||||||
{
|
{
|
||||||
switch (state)
|
switch (ucontext.state)
|
||||||
{
|
{
|
||||||
case USTATE_INSIDE_TEXT:
|
case USTATE_INSIDE_TEXT:
|
||||||
break;
|
break;
|
||||||
@ -649,31 +736,28 @@ unescape_text (GMarkupParseContext *context,
|
|||||||
|
|
||||||
if (context->state == STATE_ERROR)
|
if (context->state == STATE_ERROR)
|
||||||
{
|
{
|
||||||
g_string_free (str, TRUE);
|
g_string_free (ucontext.str, TRUE);
|
||||||
*unescaped = NULL;
|
*unescaped = NULL;
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*unescaped = g_string_free (str, FALSE);
|
*unescaped = ucontext.str;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef MAX_ENT_LEN
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static gboolean
|
static inline gboolean
|
||||||
advance_char (GMarkupParseContext *context)
|
advance_char (GMarkupParseContext *context)
|
||||||
{
|
{
|
||||||
g_return_val_if_fail (context->iter != context->current_text_end, FALSE);
|
|
||||||
|
|
||||||
context->iter = g_utf8_next_char (context->iter);
|
context->iter = g_utf8_next_char (context->iter);
|
||||||
context->char_number += 1;
|
context->char_number += 1;
|
||||||
|
|
||||||
if (context->iter == context->current_text_end)
|
if (context->iter == context->current_text_end)
|
||||||
return FALSE;
|
{
|
||||||
|
return FALSE;
|
||||||
if (*context->iter == '\n')
|
}
|
||||||
|
else if (*context->iter == '\n')
|
||||||
{
|
{
|
||||||
context->line_number += 1;
|
context->line_number += 1;
|
||||||
context->char_number = 1;
|
context->char_number = 1;
|
||||||
@ -682,7 +766,7 @@ advance_char (GMarkupParseContext *context)
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static gboolean
|
static inline gboolean
|
||||||
xml_isspace (char c)
|
xml_isspace (char c)
|
||||||
{
|
{
|
||||||
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
|
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
|
||||||
@ -716,7 +800,7 @@ add_to_partial (GMarkupParseContext *context,
|
|||||||
const gchar *text_end)
|
const gchar *text_end)
|
||||||
{
|
{
|
||||||
if (context->partial_chunk == NULL)
|
if (context->partial_chunk == NULL)
|
||||||
context->partial_chunk = g_string_new (NULL);
|
context->partial_chunk = g_string_sized_new (text_end - text_start);
|
||||||
|
|
||||||
if (text_start != text_end)
|
if (text_start != text_end)
|
||||||
g_string_append_len (context->partial_chunk, text_start,
|
g_string_append_len (context->partial_chunk, text_start,
|
||||||
@ -750,23 +834,18 @@ current_attribute (GMarkupParseContext *context)
|
|||||||
static void
|
static void
|
||||||
find_current_text_end (GMarkupParseContext *context)
|
find_current_text_end (GMarkupParseContext *context)
|
||||||
{
|
{
|
||||||
/* This function must be safe (non-segfaulting) on invalid UTF8 */
|
/* This function must be safe (non-segfaulting) on invalid UTF8.
|
||||||
|
* It assumes the string starts with a character start
|
||||||
|
*/
|
||||||
const gchar *end = context->current_text + context->current_text_len;
|
const gchar *end = context->current_text + context->current_text_len;
|
||||||
const gchar *p;
|
const gchar *p;
|
||||||
const gchar *next;
|
const gchar *next;
|
||||||
|
|
||||||
g_assert (context->current_text_len > 0);
|
g_assert (context->current_text_len > 0);
|
||||||
|
|
||||||
p = context->current_text;
|
p = g_utf8_find_prev_char (context->current_text, end);
|
||||||
next = g_utf8_find_next_char (p, end);
|
|
||||||
|
|
||||||
while (next && *next)
|
g_assert (p != NULL); /* since current_text was a char start */
|
||||||
{
|
|
||||||
if (p == next)
|
|
||||||
next++;
|
|
||||||
p = next;
|
|
||||||
next = g_utf8_find_next_char (p, end);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* p is now the start of the last character or character portion. */
|
/* p is now the start of the last character or character portion. */
|
||||||
g_assert (p != end);
|
g_assert (p != end);
|
||||||
@ -934,8 +1013,8 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
|
|||||||
* we could have a trailing incomplete char)
|
* we could have a trailing incomplete char)
|
||||||
*/
|
*/
|
||||||
if (!g_utf8_validate (context->current_text,
|
if (!g_utf8_validate (context->current_text,
|
||||||
context->current_text_len,
|
context->current_text_len,
|
||||||
&first_invalid))
|
&first_invalid))
|
||||||
{
|
{
|
||||||
gint newlines = 0;
|
gint newlines = 0;
|
||||||
const gchar *p;
|
const gchar *p;
|
||||||
@ -1352,6 +1431,8 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
|
|||||||
* with the partial chunk if any; set it for the current
|
* with the partial chunk if any; set it for the current
|
||||||
* attribute.
|
* attribute.
|
||||||
*/
|
*/
|
||||||
|
GString *unescaped;
|
||||||
|
|
||||||
add_to_partial (context, context->start, context->iter);
|
add_to_partial (context, context->start, context->iter);
|
||||||
|
|
||||||
g_assert (context->cur_attr >= 0);
|
g_assert (context->cur_attr >= 0);
|
||||||
@ -1360,10 +1441,11 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
|
|||||||
context->partial_chunk->str,
|
context->partial_chunk->str,
|
||||||
context->partial_chunk->str +
|
context->partial_chunk->str +
|
||||||
context->partial_chunk->len,
|
context->partial_chunk->len,
|
||||||
&context->attr_values[context->cur_attr],
|
&unescaped,
|
||||||
error))
|
error))
|
||||||
{
|
{
|
||||||
/* success, advance past quote and set state. */
|
/* success, advance past quote and set state. */
|
||||||
|
context->attr_values[context->cur_attr] = g_string_free (unescaped, FALSE);
|
||||||
advance_char (context);
|
advance_char (context);
|
||||||
context->state = STATE_BETWEEN_ATTRIBUTES;
|
context->state = STATE_BETWEEN_ATTRIBUTES;
|
||||||
context->start = NULL;
|
context->start = NULL;
|
||||||
@ -1390,7 +1472,7 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
|
|||||||
|
|
||||||
if (context->iter != context->current_text_end)
|
if (context->iter != context->current_text_end)
|
||||||
{
|
{
|
||||||
gchar *unescaped = NULL;
|
GString *unescaped = NULL;
|
||||||
|
|
||||||
/* The text has ended at the open angle. Call the text
|
/* The text has ended at the open angle. Call the text
|
||||||
* callback.
|
* callback.
|
||||||
@ -1407,12 +1489,12 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
|
|||||||
|
|
||||||
if (context->parser->text)
|
if (context->parser->text)
|
||||||
(*context->parser->text) (context,
|
(*context->parser->text) (context,
|
||||||
unescaped,
|
unescaped->str,
|
||||||
strlen (unescaped),
|
unescaped->len,
|
||||||
context->user_data,
|
context->user_data,
|
||||||
&tmp_error);
|
&tmp_error);
|
||||||
|
|
||||||
g_free (unescaped);
|
g_string_free (unescaped, TRUE);
|
||||||
|
|
||||||
if (tmp_error == NULL)
|
if (tmp_error == NULL)
|
||||||
{
|
{
|
||||||
@ -1869,7 +1951,8 @@ g_markup_escape_text (const gchar *text,
|
|||||||
if (length < 0)
|
if (length < 0)
|
||||||
length = strlen (text);
|
length = strlen (text);
|
||||||
|
|
||||||
str = g_string_new (NULL);
|
/* prealloc at least as long as original text */
|
||||||
|
str = g_string_sized_new (length);
|
||||||
append_escaped_text (str, text, length);
|
append_escaped_text (str, text, length);
|
||||||
|
|
||||||
return g_string_free (str, FALSE);
|
return g_string_free (str, FALSE);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user