From 28f781501e4c9a7f83b459d83f14c396fce0c981 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Thu, 9 Aug 2007 02:06:04 +0000 Subject: [PATCH] Handle restricted characters by converting them to numeric character 2007-08-08 Matthias Clasen * glib/gmarkup.c (append_escaped_text): Handle restricted characters by converting them to numeric character entities. (#464145, Andreas Monitzer) * tests/markup-escape-test.c: Add tests for restricted characters and numeric character entities. svn path=/trunk/; revision=5684 --- ChangeLog | 9 +++++++++ glib/gmarkup.c | 19 +++++++++++++++---- tests/markup-escape-test.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index b9458323f..fa10f1973 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2007-08-08 Matthias Clasen + + * glib/gmarkup.c (append_escaped_text): Handle restricted + characters by converting them to numeric character + entities. (#464145, Andreas Monitzer) + + * tests/markup-escape-test.c: Add tests for restricted + characters and numeric character entities. + 2007-08-08 Matthias Clasen * glib/glib.symbols: diff --git a/glib/gmarkup.c b/glib/gmarkup.c index e0179f394..4c941590f 100644 --- a/glib/gmarkup.c +++ b/glib/gmarkup.c @@ -955,7 +955,7 @@ g_markup_parse_context_parse (GMarkupParseContext *context, set_error (context, error, G_MARKUP_ERROR_BAD_UTF8, - _("Invalid UTF-8 encoded text")); + _("Invalid UTF-8 encoded text - overlong sequence")); } goto finished; @@ -983,7 +983,7 @@ g_markup_parse_context_parse (GMarkupParseContext *context, set_error (context, error, G_MARKUP_ERROR_BAD_UTF8, - _("Invalid UTF-8 encoded text")); + _("Invalid UTF-8 encoded text - not a start char")); goto finished; } @@ -1019,7 +1019,9 @@ g_markup_parse_context_parse (GMarkupParseContext *context, set_error (context, error, G_MARKUP_ERROR_BAD_UTF8, - _("Invalid UTF-8 encoded text")); + _("Invalid UTF-8 encoded text - not valid '%s'"), + g_strndup (context->current_text, + context->current_text_len)); goto finished; } @@ -1900,6 +1902,7 @@ append_escaped_text (GString *str, { const gchar *p; const gchar *end; + gunichar c; p = text; end = text + length; @@ -1932,7 +1935,15 @@ append_escaped_text (GString *str, break; default: - g_string_append_len (str, p, next - p); + c = g_utf8_get_char (p); + if ((0x1 <= c && c <= 0x8) || + (0xb <= c && c <= 0xc) || + (0xe <= c && c <= 0x1f) || + (0x7f <= c && c <= 0x84) || + (0x86 <= c && c <= 0x9f)) + g_string_append_printf (str, "&#x%x;", c); + else + g_string_append_len (str, p, next - p); break; } diff --git a/tests/markup-escape-test.c b/tests/markup-escape-test.c index 667d4dc96..27341623b 100644 --- a/tests/markup-escape-test.c +++ b/tests/markup-escape-test.c @@ -26,6 +26,24 @@ test (const gchar *original, g_free (result); } +static void +test_unichar (gunichar c, + gboolean entity) +{ + gint len; + gchar outbuf[7], expected[12]; + + len = g_unichar_to_utf8 (c, outbuf); + outbuf[len] = 0; + + if (entity) + g_snprintf (expected, 12, "&#x%x;", c); + else + strcpy (expected, outbuf); + + test (outbuf, expected); +} + static void test_format (const gchar *format, const gchar *expected, @@ -67,6 +85,25 @@ int main (int argc, char **argv) test ("A&&", "A&&"); test ("A&&A", "A&&A"); test ("A&A&A", "A&A&A"); + test ("AA", "A&#23;A"); + test ("A A", "A&#xa;A"); + test_unichar (0x1, TRUE); + test_unichar (0x8, TRUE); + test_unichar (0x9, FALSE); + test_unichar (0xa, FALSE); + test_unichar (0xb, TRUE); + test_unichar (0xc, TRUE); + test_unichar (0xd, FALSE); + test_unichar (0xe, TRUE); + test_unichar (0x1f, TRUE); + test_unichar (0x20, FALSE); + test_unichar (0x7e, FALSE); + test_unichar (0x7f, TRUE); + test_unichar (0x84, TRUE); + test_unichar (0x85, FALSE); + test_unichar (0x86, TRUE); + test_unichar (0x9f, TRUE); + test_unichar (0xa0, FALSE); /* Tests for g_markup_printf_escaped() */ test_format ("A", "A");