New module to parse a simple markup language

2000-10-24  Havoc Pennington  <hp@pobox.com>

        * gmarkup.h, gmarkup.c: New module to parse a simple
	markup language

	* Makefile.am: add gmarkup.h, gmarkup.c

	* tests/Makefile.am: add markup-test

	* gstring.h (g_string_new_len): new function to create a string
	with a length
	(g_string_new): avoid a gratuitous realloc
This commit is contained in:
Havoc Pennington
2000-10-27 02:46:04 +00:00
committed by Havoc Pennington
parent 7ea09e4589
commit 32ef70d4b2
59 changed files with 4229 additions and 2 deletions

View File

@@ -18,6 +18,7 @@ TESTS = \
gio-test \
hash-test \
list-test \
markup-test \
node-test \
queue-test \
rand-test \
@@ -43,6 +44,7 @@ dirname_test_LDADD = $(progs_LDADD)
gio_test_LDADD = $(progs_LDADD)
hash_test_LDADD = $(progs_LDADD)
list_test_LDADD = $(progs_LDADD)
markup_test_LDADD = $(progs_LDADD)
node_test_LDADD = $(progs_LDADD)
queue_test_LDADD = $(progs_LDADD)
rand_test_LDADD = $(progs_LDADD)

202
tests/markup-test.c Normal file
View File

@@ -0,0 +1,202 @@
#include <stdio.h>
#include <glib.h>
static int depth = 0;
static void
indent (int extra)
{
int i = 0;
while (i < depth)
{
fputs (" ", stdout);
++i;
}
}
static void
start_element_handler (GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError **error)
{
int i;
indent (0);
printf ("ELEMENT '%s'\n", element_name);
i = 0;
while (attribute_names[i] != NULL)
{
indent (1);
printf ("%s=\"%s\"\n",
attribute_names[i],
attribute_values[i]);
++i;
}
++depth;
}
static void
end_element_handler (GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error)
{
--depth;
indent (0);
printf ("END '%s'\n", element_name);
}
static void
text_handler (GMarkupParseContext *context,
const gchar *text,
gint text_len,
gpointer user_data,
GError **error)
{
indent (0);
printf ("TEXT '%s'\n", text);
}
static void
passthrough_handler (GMarkupParseContext *context,
const gchar *passthrough_text,
gint text_len,
gpointer user_data,
GError **error)
{
indent (0);
printf ("PASS '%s'\n", passthrough_text);
}
static void
error_handler (GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
fprintf (stderr, " %s\n", error->message);
}
static GMarkupParser parser = {
start_element_handler,
end_element_handler,
text_handler,
passthrough_handler,
error_handler
};
static int
test_in_chunks (const gchar *contents,
gint length,
gint chunk_size)
{
GMarkupParseContext *context;
int i = 0;
context = g_markup_parse_context_new (&parser, 0, NULL, NULL);
while (i < length)
{
int this_chunk = MIN (length - i, chunk_size);
if (!g_markup_parse_context_parse (context,
contents + i,
this_chunk,
NULL))
{
g_markup_parse_context_free (context);
return 1;
}
i += this_chunk;
}
if (!g_markup_parse_context_end_parse (context, NULL))
{
g_markup_parse_context_free (context);
return 1;
}
g_markup_parse_context_free (context);
return 0;
}
static int
test_file (const gchar *filename)
{
gchar *contents;
gint length;
GError *error;
GMarkupParseContext *context;
error = NULL;
if (!g_file_get_contents (filename,
&contents,
&length,
&error))
{
fprintf (stderr, "%s\n", error->message);
g_error_free (error);
return 1;
}
context = g_markup_parse_context_new (&parser, 0, NULL, NULL);
if (!g_markup_parse_context_parse (context, contents, length, NULL))
{
g_markup_parse_context_free (context);
return 1;
}
if (!g_markup_parse_context_end_parse (context, NULL))
{
g_markup_parse_context_free (context);
return 1;
}
g_markup_parse_context_free (context);
/* A byte at a time */
if (test_in_chunks (contents, length, 1) != 0)
return 1;
/* 2 bytes */
if (test_in_chunks (contents, length, 2) != 0)
return 1;
/*5 bytes */
if (test_in_chunks (contents, length, 5) != 0)
return 1;
/* 12 bytes */
if (test_in_chunks (contents, length, 12) != 0)
return 1;
/* 1024 bytes */
if (test_in_chunks (contents, length, 1024) != 0)
return 1;
return 0;
}
int
main (int argc,
char *argv[])
{
if (argc > 1)
return test_file (argv[1]);
else
{
fprintf (stderr, "Give a markup file on the command line\n");
return 1;
}
}

View File

View File

@@ -0,0 +1,2 @@
<foo>
</|foo>

View File

@@ -0,0 +1,4 @@
<foo>
<bar>
</foo>
</bar>

View File

@@ -0,0 +1 @@
</foo>

View File

@@ -0,0 +1 @@
</foo|>

View File

@@ -0,0 +1,2 @@
<foo>
<

View File

@@ -0,0 +1,3 @@
<foo>
<bar>
</bar>

View File

@@ -0,0 +1 @@
<foo/

View File

@@ -0,0 +1 @@
<fo

View File

@@ -0,0 +1 @@
<foo bar

View File

@@ -0,0 +1 @@
<foo

View File

@@ -0,0 +1 @@
<EFBFBD>ν

View File

@@ -0,0 +1 @@
<foo bar=

View File

@@ -0,0 +1 @@
<foo bar="fdsf

View File

@@ -0,0 +1 @@
<foo>

View File

@@ -0,0 +1,2 @@
<foo>
<fo

View File

@@ -0,0 +1 @@
<!-- dfklsjdf;kljsdf;ljk document ends here

View File

@@ -0,0 +1 @@
<? document ending unexpectedly

View File

@@ -0,0 +1 @@
<foo>&;</foo>

View File

@@ -0,0 +1 @@
<foo>&|;</foo>

View File

@@ -0,0 +1 @@
<foo>&am|;</foo>

View File

@@ -0,0 +1 @@
<foo>&bar;</foo>

View File

@@ -0,0 +1,49 @@
<foobar>
Παν語
This is a list of ways to say hello in various languages. Its purpose is to illustrate a number of scripts.
(Converted into UTF-8)
---------------------------------------------------------
Arabic السلام عليكم
Czech (česky) Dobrý den
Danish (Dansk) Hej, Goddag
English Hello
Esperanto Saluton
Estonian Tere, Tervist
FORTRAN PROGRAM
Finnish (Suomi) Hei
French (Français) Bonjour, Salut
German (Deutsch Nord) Guten Tag
German (Deutsch Süd) Grüß Gott
Greek (Ελληνικά) Γειά σας
Hebrew שלום
Hindi नमस्ते, <20><>मस्कार।
Italiano Ciao, Buon giorno
Maltese Ċaw, Saħħa
Nederlands, Vlaams Hallo, Dag
Norwegian (Norsk) Hei, God dag
Polish Dzień dobry, Hej
Russian (Русский) Здравствуйте!
Slovak Dobrý deň
Spanish (Español) ¡Hola!
Swedish (Svenska) Hej, Goddag
Thai (ภาษาไทย) สวัสดีครับ, สวัสดีค่ะ
Turkish (Türkçe) Merhaba
Vietnamese (Tiếng Việt) Xin Chào
Yiddish (ײַדישע) דאָס הײַזעלע
Japanese (日本語) こんにちは, コンニチハ
Chinese (中文,普通话,汉语) 你好
Cantonese (粵語,廣東話) 早晨, 你好
Korean (한글) 안녕하세요, 안녕하십니까
Difference among chinese characters in GB, JIS, KSC, BIG5:
GB -- 元气 开发
JIS -- 元気 開発
KSC -- 元氣 開發
BIG5 -- 元氣 開發
</foobar>

View File

@@ -0,0 +1 @@
<foo>&sdfkljsdsdfsdfsdfsdf</foo>

View File

@@ -0,0 +1 @@
<foo>&#34592348345343453453455645765736575865767;</foo>

View File

@@ -0,0 +1 @@
<foo>&#x10;</foo>

View File

@@ -0,0 +1 @@
<foo>&#;</foo>

View File

@@ -0,0 +1 @@
<foo>&#234234</foo>

View File

@@ -0,0 +1 @@
foo

View File

@@ -0,0 +1,2 @@
<|foo>
</|foo>

View File

@@ -0,0 +1,2 @@
<foo|>
</foo>

View File

@@ -0,0 +1,2 @@
<foo bar}"baz">
</foo>

View File

@@ -0,0 +1,2 @@
<foo/}>
</foo>

View File

@@ -0,0 +1,2 @@
<foo bar={baz">
</foo>

View File

@@ -0,0 +1,9 @@
<!-- Comment -->
<?PI ?>
<foobar>
<e1>Hi &amp; this is some text inside an element Two 'E' chars as character refs: &#69; &#x45; and some 'J': &#74; &#x4A;</e1>
<e2:foo> Text <childfree/> with some <nested>nested elements</nested> and entities &quot;&amp; &lt; &gt;&gt; &apos; and whitespace </e2:foo>
<tag ab="fo&lt;o" bar="foo" baz="blah">This element has attributes</tag>
<nochildren a="b" xyz="qrs"/>
</foobar>

View File

@@ -0,0 +1,49 @@
<foobar>
Παν語
This is a list of ways to say hello in various languages. Its purpose is to illustrate a number of scripts.
(Converted into UTF-8)
---------------------------------------------------------
Arabic السلام عليكم
Czech (česky) Dobrý den
Danish (Dansk) Hej, Goddag
English Hello
Esperanto Saluton
Estonian Tere, Tervist
FORTRAN PROGRAM
Finnish (Suomi) Hei
French (Français) Bonjour, Salut
German (Deutsch Nord) Guten Tag
German (Deutsch Süd) Grüß Gott
Greek (Ελληνικά) Γειά σας
Hebrew שלום
Hindi नमस्ते, नमस्कार।
Italiano Ciao, Buon giorno
Maltese Ċaw, Saħħa
Nederlands, Vlaams Hallo, Dag
Norwegian (Norsk) Hei, God dag
Polish Dzień dobry, Hej
Russian (Русский) Здравствуйте!
Slovak Dobrý deň
Spanish (Español) ¡Hola!
Swedish (Svenska) Hej, Goddag
Thai (ภาษาไทย) สวัสดีครับ, สวัสดีค่ะ
Turkish (Türkçe) Merhaba
Vietnamese (Tiếng Việt) Xin Chào
Yiddish (ײַדישע) דאָס הײַזעלע
Japanese (日本語) こんにちは, コンニチハ
Chinese (中文,普通话,汉语) 你好
Cantonese (粵語,廣東話) 早晨, 你好
Korean (한글) 안녕하세요, 안녕하십니까
Difference among chinese characters in GB, JIS, KSC, BIG5:
GB -- 元气 开发
JIS -- 元気 開発
KSC -- 元氣 開發
BIG5 -- 元氣 開發
</foobar>

19
tests/run-markup-tests.sh Executable file
View File

@@ -0,0 +1,19 @@
#! /bin/sh
fail ()
{
echo "Test failed: $*"
exit 1
}
for I in markups/fail-*.gmarkup; do
echo "Parsing $I, should fail"
./markup-test $I > /dev/null && fail "failed to generate error on $I"
done
for I in markups/valid-*.gmarkup; do
echo "Parsing $I, should succeed"
./markup-test $I > /dev/null || fail "failed on $I"
done
echo "All tests passed."