Optimizations; don't scan the entire text in find_current_text_end(),

2004-11-28 Matthias Clasen <mclasen@redhat.com> * glib/gmarkup.c: Optimizations; don't scan the entire text in find_current_text_end(), split unescape_text() into multiple functions. (#159001, Havoc Pennington)
2025-11-21 09:57:30 +01:00 · 2004-11-28 05:40:10 +00:00
parent c77ae3942c
commit 86c78552ec
6 changed files with 410 additions and 297 deletions
--- a/6
+++ b/6
@@ -1,3 +1,9 @@
 2004-11-28  Matthias Clasen  <mclasen@redhat.com>
 	* glib/gmarkup.c: Optimizations; don't scan the entire text
 	in find_current_text_end(), split unescape_text() into multiple
 	functions.  (#159001, Havoc Pennington)
 2004-11-27  Matthias Clasen  <mclasen@redhat.com>
 	* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
--- a/ChangeLog.pre-2-10
+++ b/ChangeLog.pre-2-10
@@ -1,3 +1,9 @@
 2004-11-28  Matthias Clasen  <mclasen@redhat.com>
 	* glib/gmarkup.c: Optimizations; don't scan the entire text
 	in find_current_text_end(), split unescape_text() into multiple
 	functions.  (#159001, Havoc Pennington)
 2004-11-27  Matthias Clasen  <mclasen@redhat.com>
 	* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
--- a/ChangeLog.pre-2-12
+++ b/ChangeLog.pre-2-12
@@ -1,3 +1,9 @@
 2004-11-28  Matthias Clasen  <mclasen@redhat.com>
 	* glib/gmarkup.c: Optimizations; don't scan the entire text
 	in find_current_text_end(), split unescape_text() into multiple
 	functions.  (#159001, Havoc Pennington)
 2004-11-27  Matthias Clasen  <mclasen@redhat.com>
 	* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
--- a/ChangeLog.pre-2-6
+++ b/ChangeLog.pre-2-6
@@ -1,3 +1,9 @@
 2004-11-28  Matthias Clasen  <mclasen@redhat.com>
 	* glib/gmarkup.c: Optimizations; don't scan the entire text
 	in find_current_text_end(), split unescape_text() into multiple
 	functions.  (#159001, Havoc Pennington)
 2004-11-27  Matthias Clasen  <mclasen@redhat.com>
 	* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
--- a/ChangeLog.pre-2-8
+++ b/ChangeLog.pre-2-8
@@ -1,3 +1,9 @@
 2004-11-28  Matthias Clasen  <mclasen@redhat.com>
 	* glib/gmarkup.c: Optimizations; don't scan the entire text
 	in find_current_text_end(), split unescape_text() into multiple
 	functions.  (#159001, Havoc Pennington)
 2004-11-27  Matthias Clasen  <mclasen@redhat.com>
 	* glib/gspawn.c (g_spawn_async_with_pipes): Update the @flags
--- a/glib/gmarkup.c
+++ b/glib/gmarkup.c
@@ -235,12 +235,24 @@ set_error (GMarkupParseContext *context,
  g_propagate_error (error, tmp_error);
 }
 /* To make these faster, we first use the ascii-only tests, then check
 * for the usual non-alnum name-end chars, and only then call the
 * expensive unicode stuff. Nobody uses non-ascii in XML tag/attribute
 * names, so this is a reasonable hack that virtually always avoids
 * the guniprop call.
 */
 #define IS_COMMON_NAME_END_CHAR(c) \
  ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
 static gboolean
 is_name_start_char (gunichar c)
 {
-  if (g_unichar_isalpha (c) ||
+  if (g_ascii_isalpha (c) ||
-      c == '_' ||
+      (!IS_COMMON_NAME_END_CHAR (c) &&
-      c == ':')
+       (g_unichar_isalpha (c) ||
        c == '_' ||
        c == ':')))
    return TRUE;
  else
    return FALSE;
@@ -249,11 +261,13 @@ is_name_start_char (gunichar c)
 static gboolean
 is_name_char (gunichar c)
 {
-  if (g_unichar_isalnum (c) ||
+  if (g_ascii_isalnum (c) ||
-      c == '.' ||
+      (!IS_COMMON_NAME_END_CHAR (c) &&
-      c == '-' ||
+       (g_unichar_isalnum (c) ||
-      c == '_' ||
+        c == '.' ||
-      c == ':')
+        c == '-' ||
        c == '_' ||
        c == ':')))
    return TRUE;
  else
    return FALSE;
@@ -326,296 +340,369 @@ typedef enum
  USTATE_AFTER_CHARREF_HASH
 } UnescapeState;
-static gboolean
+typedef struct
 unescape_text (GMarkupParseContext *context,
               const gchar         *text,
               const gchar         *text_end,
               gchar              **unescaped,
               GError             **error)
 {
-#define MAX_ENT_LEN 5
+  GMarkupParseContext *context;
  GString *str;
  const gchar *p;
  UnescapeState state;
  const gchar *text;
  const gchar *text_end;
  const gchar *entity_start;
 } UnescapeContext;
 static const gchar*
 unescape_text_state_inside_text (UnescapeContext *ucontext,
                                 const gchar     *p,
                                 GError         **error)
 {
  const gchar *start;
  gboolean normalize_attribute;
-  str = g_string_new (NULL);
+  if (ucontext->context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
-
+      ucontext->context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
  if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
      context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
    normalize_attribute = TRUE;
  else
    normalize_attribute = FALSE;
  state = USTATE_INSIDE_TEXT;
  p = text;
  start = p;
  while (p != ucontext->text_end)
    {
      if (*p == '&')
        {
          break;
        }
      else if (normalize_attribute && (*p == '\t' || *p == '\n'))
        {
          g_string_append_len (ucontext->str, start, p - start);
          g_string_append_c (ucontext->str, ' ');
          p = g_utf8_next_char (p);
          start = p;
        }
      else if (*p == '\r')
        {
          g_string_append_len (ucontext->str, start, p - start);
          g_string_append_c (ucontext->str, normalize_attribute ? ' ' : '\n');
          p = g_utf8_next_char (p);
          if (p != ucontext->text_end && *p == '\n')
            p = g_utf8_next_char (p);
          start = p;
        }
      else
        p = g_utf8_next_char (p);
    }
  if (p != start)
    g_string_append_len (ucontext->str, start, p - start);
  if (p != ucontext->text_end && *p == '&')
    {
      p = g_utf8_next_char (p);
      ucontext->state = USTATE_AFTER_AMPERSAND;
    }
  return p;
 }
 __attribute__ ((noinline))
 static const gchar*
 unescape_text_state_after_ampersand (UnescapeContext *ucontext,
                                     const gchar     *p,
                                     GError         **error)
 {
  ucontext->entity_start = NULL;
  if (*p == '#')
    {
      p = g_utf8_next_char (p);
      ucontext->entity_start = p;
      ucontext->state = USTATE_AFTER_CHARREF_HASH;
    }
  else if (!is_name_start_char (g_utf8_get_char (p)))
    {
      if (*p == ';')
        {
          set_unescape_error (ucontext->context, error,
                              p, ucontext->text_end,
                              G_MARKUP_ERROR_PARSE,
                              _("Empty entity '&;' seen; valid "
                                "entities are: &amp; &quot; &lt; &gt; &apos;"));
        }
      else
        {
          gchar buf[7];
          set_unescape_error (ucontext->context, error,
                              p, ucontext->text_end,
                              G_MARKUP_ERROR_PARSE,
                              _("Character '%s' is not valid at "
                                "the start of an entity name; "
                                "the & character begins an entity; "
                                "if this ampersand isn't supposed "
                                "to be an entity, escape it as "
                                "&amp;"),
                              utf8_str (p, buf));
        }
    }
  else
    {
      ucontext->entity_start = p;
      ucontext->state = USTATE_INSIDE_ENTITY_NAME;
    }
  return p;
 }
 __attribute__ ((noinline))
 static const gchar*
 unescape_text_state_inside_entity_name (UnescapeContext *ucontext,
                                        const gchar     *p,
                                        GError         **error)
 {
 #define MAX_ENT_LEN 5
  gchar buf[MAX_ENT_LEN+1] = {
    '\0', '\0', '\0', '\0', '\0', '\0'
  };
  gchar *dest;
  while (p != ucontext->text_end)
    {
      if (*p == ';')
        break;
      else if (!is_name_char (*p))
        {
          gchar ubuf[7];
          set_unescape_error (ucontext->context, error,
                              p, ucontext->text_end,
                              G_MARKUP_ERROR_PARSE,
                              _("Character '%s' is not valid "
                                "inside an entity name"),
                              utf8_str (p, ubuf));
          break;
        }
      p = g_utf8_next_char (p);
    }
  if (ucontext->context->state != STATE_ERROR)
    {
      if (p != ucontext->text_end)
        {
          const gchar *src;
          src = ucontext->entity_start;
          dest = buf;
          while (src != p)
            {
              *dest = *src;
              ++dest;
              ++src;
            }
          /* move to after semicolon */
          p = g_utf8_next_char (p);
          ucontext->state = USTATE_INSIDE_TEXT;
          if (strcmp (buf, "lt") == 0)
            g_string_append_c (ucontext->str, '<');
          else if (strcmp (buf, "gt") == 0)
            g_string_append_c (ucontext->str, '>');
          else if (strcmp (buf, "amp") == 0)
            g_string_append_c (ucontext->str, '&');
          else if (strcmp (buf, "quot") == 0)
            g_string_append_c (ucontext->str, '"');
          else if (strcmp (buf, "apos") == 0)
            g_string_append_c (ucontext->str, '\'');
          else
            {
              set_unescape_error (ucontext->context, error,
                                  p, ucontext->text_end,
                                  G_MARKUP_ERROR_PARSE,
                                  _("Entity name '%s' is not known"),
                                  buf);
            }
        }
      else
        {
          set_unescape_error (ucontext->context, error,
                              /* give line number of the & */
                              ucontext->entity_start, ucontext->text_end,
                              G_MARKUP_ERROR_PARSE,
                              _("Entity did not end with a semicolon; "
                                "most likely you used an ampersand "
                                "character without intending to start "
                                "an entity - escape ampersand as &amp;"));
        }
    }
 #undef MAX_ENT_LEN
  return p;
 }
 __attribute__ ((noinline))
 static const gchar*
 unescape_text_state_after_charref_hash (UnescapeContext *ucontext,
                                        const gchar     *p,
                                        GError         **error)
 {
  gboolean is_hex = FALSE;
  const char *start;
  start = ucontext->entity_start;
  if (*p == 'x')
    {
      is_hex = TRUE;
      p = g_utf8_next_char (p);
      start = p;
    }
  while (p != ucontext->text_end && *p != ';')
    p = g_utf8_next_char (p);
  if (p != ucontext->text_end)
    {
      g_assert (*p == ';');
      /* digit is between start and p */
      if (start != p)
        {
          gchar *digit = g_strndup (start, p - start);
          gulong l;
          gchar *end = NULL;
          gchar *digit_end = digit + (p - start);
          errno = 0;
          if (is_hex)
            l = strtoul (digit, &end, 16);
          else
            l = strtoul (digit, &end, 10);
          if (end != digit_end || errno != 0)
            {
              set_unescape_error (ucontext->context, error,
                                  start, ucontext->text_end,
                                  G_MARKUP_ERROR_PARSE,
                                  _("Failed to parse '%s', which "
                                    "should have been a digit "
                                    "inside a character reference "
                                    "(&#234; for example) - perhaps "
                                    "the digit is too large"),
                                  digit);
            }
          else
            {
              /* characters XML permits */
              if (l == 0x9 ||
                  l == 0xA ||
                  l == 0xD ||
                  (l >= 0x20 && l <= 0xD7FF) ||
                  (l >= 0xE000 && l <= 0xFFFD) ||
                  (l >= 0x10000 && l <= 0x10FFFF))
                {
                  gchar buf[7];
                  g_string_append (ucontext->str, char_str (l, buf));
                }
              else
                {
                  set_unescape_error (ucontext->context, error,
                                      start, ucontext->text_end,
                                      G_MARKUP_ERROR_PARSE,
                                      _("Character reference '%s' does not encode a permitted character"),
                                      digit);
                }
            }
          g_free (digit);
          /* Move to next state */
          p = g_utf8_next_char (p); /* past semicolon */
          ucontext->state = USTATE_INSIDE_TEXT;
        }
      else
        {
          set_unescape_error (ucontext->context, error,
                              start, ucontext->text_end,
                              G_MARKUP_ERROR_PARSE,
                              _("Empty character reference; "
                                "should include a digit such as "
                                "&#454;"));
        }
    }
  else
    {
      set_unescape_error (ucontext->context, error,
                          start, ucontext->text_end,
                          G_MARKUP_ERROR_PARSE,
                          _("Character reference did not end with a "
                            "semicolon; "
                            "most likely you used an ampersand "
                            "character without intending to start "
                            "an entity - escape ampersand as &amp;"));
    }
  return p;
 }
 static gboolean
 unescape_text (GMarkupParseContext *context,
               const gchar         *text,
               const gchar         *text_end,
               GString            **unescaped,
               GError             **error)
 {
  UnescapeContext ucontext;
  const gchar *p;
  ucontext.context = context;
  ucontext.text = text;
  ucontext.text_end = text_end;
  ucontext.entity_start = NULL;
  ucontext.str = g_string_sized_new (text_end - text);
  ucontext.state = USTATE_INSIDE_TEXT;
  p = text;
  while (p != text_end && context->state != STATE_ERROR)
    {
      g_assert (p < text_end);
-      switch (state)
+      switch (ucontext.state)
        {
        case USTATE_INSIDE_TEXT:
          {
-            while (p != text_end && *p != '&')
+            p = unescape_text_state_inside_text (&ucontext,
-	      {
+                                                 p,
-		if ((*p == '\t' || *p == '\n') && normalize_attribute)
+                                                 error);
 		  {
 		    g_string_append_len (str, start, p - start);
 		    g_string_append_c (str, ' ');
 		    p = g_utf8_next_char (p);
 		    start = p;
 		  }
 		else if (*p == '\r')
 		  {
 		    g_string_append_len (str, start, p - start);
 		    g_string_append_c (str, normalize_attribute ? ' ' : '\n');
 		    p = g_utf8_next_char (p);
 		    if (*p == '\n')
 		      p = g_utf8_next_char (p);
 		    start = p;
 		  }
 		else
 		  p = g_utf8_next_char (p);
 	      }
            if (p != start)
              {
                g_string_append_len (str, start, p - start);
                start = NULL;
              }
            if (p != text_end && *p == '&')
              {
                p = g_utf8_next_char (p);
                state = USTATE_AFTER_AMPERSAND;
              }
          }
          break;
        case USTATE_AFTER_AMPERSAND:
          {
-            if (*p == '#')
+            p = unescape_text_state_after_ampersand (&ucontext,
-              {
+                                                     p,
-                p = g_utf8_next_char (p);
+                                                     error);
                start = p;
                state = USTATE_AFTER_CHARREF_HASH;
              }
            else if (!is_name_start_char (g_utf8_get_char (p)))
              {
                if (*p == ';')
                  {
                    set_unescape_error (context, error,
                                        p, text_end,
                                        G_MARKUP_ERROR_PARSE,
                                        _("Empty entity '&;' seen; valid "
                                          "entities are: &amp; &quot; &lt; &gt; &apos;"));
                  }
                else
                  {
                    gchar buf[7];
                    set_unescape_error (context, error,
                                        p, text_end,
                                        G_MARKUP_ERROR_PARSE,
                                        _("Character '%s' is not valid at "
                                          "the start of an entity name; "
                                          "the & character begins an entity; "
                                          "if this ampersand isn't supposed "
                                          "to be an entity, escape it as "
                                          "&amp;"),
                                        utf8_str (p, buf));
                  }
              }
            else
              {
                start = p;
                state = USTATE_INSIDE_ENTITY_NAME;
              }
          }
          break;
        case USTATE_INSIDE_ENTITY_NAME:
          {
-            gchar buf[MAX_ENT_LEN+1] = {
+            p = unescape_text_state_inside_entity_name (&ucontext,
-              '\0', '\0', '\0', '\0', '\0', '\0'
+                                                        p,
-            };
+                                                        error);
            gchar *dest;
            while (p != text_end)
              {
                if (*p == ';')
                  break;
                else if (!is_name_char (*p))
                  {
                    gchar ubuf[7];
                    set_unescape_error (context, error,
                                        p, text_end,
                                        G_MARKUP_ERROR_PARSE,
                                        _("Character '%s' is not valid "
                                          "inside an entity name"),
                                        utf8_str (p, ubuf));
                    break;
                  }
                p = g_utf8_next_char (p);
              }
            if (context->state != STATE_ERROR)
              {
                if (p != text_end)
                  {
                    const gchar *src;
                    src = start;
                    dest = buf;
                    while (src != p)
                      {
                        *dest = *src;
                        ++dest;
                        ++src;
                      }
                    /* move to after semicolon */
                    p = g_utf8_next_char (p);
                    start = p;
                    state = USTATE_INSIDE_TEXT;
                    if (strcmp (buf, "lt") == 0)
                      g_string_append_c (str, '<');
                    else if (strcmp (buf, "gt") == 0)
                      g_string_append_c (str, '>');
                    else if (strcmp (buf, "amp") == 0)
                      g_string_append_c (str, '&');
                    else if (strcmp (buf, "quot") == 0)
                      g_string_append_c (str, '"');
                    else if (strcmp (buf, "apos") == 0)
                      g_string_append_c (str, '\'');
                    else
                      {
                        set_unescape_error (context, error,
                                            p, text_end,
                                            G_MARKUP_ERROR_PARSE,
                                            _("Entity name '%s' is not known"),
                                            buf);
                      }
                  }
                else
                  {
                    set_unescape_error (context, error,
                                        /* give line number of the & */
                                        start, text_end,
                                        G_MARKUP_ERROR_PARSE,
                                        _("Entity did not end with a semicolon; "
                                          "most likely you used an ampersand "
                                          "character without intending to start "
                                          "an entity - escape ampersand as &amp;"));
                  }
              }
          }
          break;
        case USTATE_AFTER_CHARREF_HASH:
          {
-            gboolean is_hex = FALSE;
+            p = unescape_text_state_after_charref_hash (&ucontext,
-            if (*p == 'x')
+                                                        p,
-              {
+                                                        error);
                is_hex = TRUE;
                p = g_utf8_next_char (p);
                start = p;
              }
            while (p != text_end && *p != ';')
              p = g_utf8_next_char (p);
            if (p != text_end)
              {
                g_assert (*p == ';');
                /* digit is between start and p */
                if (start != p)
                  {
                    gchar *digit = g_strndup (start, p - start);
                    gulong l;
                    gchar *end = NULL;
                    gchar *digit_end = digit + (p - start);
                    errno = 0;
                    if (is_hex)
                      l = strtoul (digit, &end, 16);
                    else
                      l = strtoul (digit, &end, 10);
                    if (end != digit_end || errno != 0)
                      {
                        set_unescape_error (context, error,
                                            start, text_end,
                                            G_MARKUP_ERROR_PARSE,
                                            _("Failed to parse '%s', which "
                                              "should have been a digit "
                                              "inside a character reference "
                                              "(&#234; for example) - perhaps "
                                              "the digit is too large"),
                                            digit);
                      }
                    else
                      {
                        /* characters XML permits */
                        if (l == 0x9 ||
                            l == 0xA ||
                            l == 0xD ||
                            (l >= 0x20 && l <= 0xD7FF) ||
                            (l >= 0xE000 && l <= 0xFFFD) ||
                            (l >= 0x10000 && l <= 0x10FFFF))
                          {
                            gchar buf[7];
                            g_string_append (str, char_str (l, buf));
                          }
                        else
                          {
                            set_unescape_error (context, error,
                                                start, text_end,
                                                G_MARKUP_ERROR_PARSE,
                                                _("Character reference '%s' does not encode a permitted character"),
                                                digit);
                          }
                      }
                    g_free (digit);
                    /* Move to next state */
                    p = g_utf8_next_char (p); /* past semicolon */
                    start = p;
                    state = USTATE_INSIDE_TEXT;
                  }
                else
                  {
                    set_unescape_error (context, error,
                                        start, text_end,
                                        G_MARKUP_ERROR_PARSE,
                                        _("Empty character reference; "
                                          "should include a digit such as "
                                          "&#454;"));
                  }
              }
            else
              {
                set_unescape_error (context, error,
                                    start, text_end,
                                    G_MARKUP_ERROR_PARSE,
                                    _("Character reference did not end with a "
                                      "semicolon; "
                                      "most likely you used an ampersand "
                                      "character without intending to start "
                                      "an entity - escape ampersand as &amp;"));
              }
          }
          break;
@@ -627,7 +714,7 @@ unescape_text (GMarkupParseContext *context,
  if (context->state != STATE_ERROR) 
    {
-      switch (state) 
+      switch (ucontext.state) 
 	{
 	case USTATE_INSIDE_TEXT:
 	  break;
@@ -649,31 +736,28 @@ unescape_text (GMarkupParseContext *context,
  if (context->state == STATE_ERROR)
    {
-      g_string_free (str, TRUE);
+      g_string_free (ucontext.str, TRUE);
      *unescaped = NULL;
      return FALSE;
    }
  else
    {
-      *unescaped = g_string_free (str, FALSE);
+      *unescaped = ucontext.str;
      return TRUE;
    }
 #undef MAX_ENT_LEN
 }
-static gboolean
+static inline gboolean
 advance_char (GMarkupParseContext *context)
 {  
  g_return_val_if_fail (context->iter != context->current_text_end, FALSE);
  context->iter = g_utf8_next_char (context->iter);
  context->char_number += 1;
  if (context->iter == context->current_text_end)
-    return FALSE;
+    {
-
+      return FALSE;
-  if (*context->iter == '\n')
+    }
  else if (*context->iter == '\n')
    {
      context->line_number += 1;
      context->char_number = 1;
@@ -682,7 +766,7 @@ advance_char (GMarkupParseContext *context)
  return TRUE;
 }
-static gboolean
+static inline gboolean
 xml_isspace (char c)
 {
  return c == ' ' || c == '\t' || c == '\n' || c == '\r';
@@ -716,7 +800,7 @@ add_to_partial (GMarkupParseContext *context,
                const gchar         *text_end)
 {
  if (context->partial_chunk == NULL)
-    context->partial_chunk = g_string_new (NULL);
+    context->partial_chunk = g_string_sized_new (text_end - text_start);
  if (text_start != text_end)
    g_string_append_len (context->partial_chunk, text_start,
@@ -750,23 +834,18 @@ current_attribute (GMarkupParseContext *context)
 static void
 find_current_text_end (GMarkupParseContext *context)
 {
-  /* This function must be safe (non-segfaulting) on invalid UTF8 */
+  /* This function must be safe (non-segfaulting) on invalid UTF8.
   * It assumes the string starts with a character start
   */
  const gchar *end = context->current_text + context->current_text_len;
  const gchar *p;
  const gchar *next;
  g_assert (context->current_text_len > 0);
-  p = context->current_text;
+  p = g_utf8_find_prev_char (context->current_text, end);
  next = g_utf8_find_next_char (p, end);
-  while (next && *next)
+  g_assert (p != NULL); /* since current_text was a char start */
    {
      if (p == next)
 	next++;
      p = next;
      next = g_utf8_find_next_char (p, end);
    }
  /* p is now the start of the last character or character portion. */
  g_assert (p != end);
@@ -934,8 +1013,8 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
   * we could have a trailing incomplete char)
   */
  if (!g_utf8_validate (context->current_text,
-                        context->current_text_len,
+			context->current_text_len,
-                        &first_invalid))
+			&first_invalid))
    {
      gint newlines = 0;
      const gchar *p;
@@ -1352,6 +1431,8 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
               * with the partial chunk if any; set it for the current
               * attribute.
               */
              GString *unescaped;
              add_to_partial (context, context->start, context->iter);
              g_assert (context->cur_attr >= 0);
@@ -1360,10 +1441,11 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
                                 context->partial_chunk->str,
                                 context->partial_chunk->str +
                                 context->partial_chunk->len,
-                                 &context->attr_values[context->cur_attr],
+                                 &unescaped,
                                 error))
                {
                  /* success, advance past quote and set state. */
                  context->attr_values[context->cur_attr] = g_string_free (unescaped, FALSE);
                  advance_char (context);
                  context->state = STATE_BETWEEN_ATTRIBUTES;
                  context->start = NULL;
@@ -1390,7 +1472,7 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
          if (context->iter != context->current_text_end)
            {
-              gchar *unescaped = NULL;
+              GString *unescaped = NULL;
              /* The text has ended at the open angle. Call the text
               * callback.
@@ -1407,12 +1489,12 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
                  if (context->parser->text)
                    (*context->parser->text) (context,
-                                              unescaped,
+                                              unescaped->str,
-                                              strlen (unescaped),
+                                              unescaped->len,
                                              context->user_data,
                                              &tmp_error);
-                  g_free (unescaped);
+                  g_string_free (unescaped, TRUE);
                  if (tmp_error == NULL)
                    {
@@ -1869,7 +1951,8 @@ g_markup_escape_text (const gchar *text,
  if (length < 0)
    length = strlen (text);
-  str = g_string_new (NULL);
+  /* prealloc at least as long as original text */
  str = g_string_sized_new (length);
  append_escaped_text (str, text, length);
  return g_string_free (str, FALSE);