From 1b101a38730f151b70f0ada607dbd8e43778f980 Mon Sep 17 00:00:00 2001 From: Mikhail Zabaluev Date: Mon, 15 Mar 2010 03:37:11 +0200 Subject: [PATCH] Added performance tests for UTF-8 decoding functions https://bugzilla.gnome.org/show_bug.cgi?id=619418 --- glib/tests/Makefile.am | 4 + glib/tests/utf8-performance.c | 202 ++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 glib/tests/utf8-performance.c diff --git a/glib/tests/Makefile.am b/glib/tests/Makefile.am index 39816e59e..513e3cf19 100644 --- a/glib/tests/Makefile.am +++ b/glib/tests/Makefile.am @@ -73,6 +73,10 @@ gvariant_LDADD = $(progs_ldadd) TEST_PROGS += mem-overflow mem_overflow_LDADD = $(progs_ldadd) +TEST_PROGS += utf8-performance +utf8_performance_SOURCES = utf8-performance.c +utf8_performance_LDADD = $(progs_ldadd) + TEST_PROGS += utils utils_LDADD = $(progs_ldadd) diff --git a/glib/tests/utf8-performance.c b/glib/tests/utf8-performance.c new file mode 100644 index 000000000..03b1442d5 --- /dev/null +++ b/glib/tests/utf8-performance.c @@ -0,0 +1,202 @@ +/* GLIB - Library of useful routines for C programming + * + * Copyright (C) 2010 Mikhail Zabaluev + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include + +#include + +#define NUM_ITERATIONS 500000 + +static const char str_ascii[] = + "The quick brown fox jumps over the lazy dog"; + +static const gchar str_latin1[] = + "Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich"; + +/* Energizing GOELRO-talk in Russian, used by KDE */ +static const char str_cyrillic[] = + "Широкая электрификация южных губерний даст мощный толчок подъёму " + "сельского хозяйства."; + +/* First sentence from the Wikipedia article: + * http://zh.wikipedia.org/w/index.php?title=%E6%B1%89%E5%AD%97&oldid=13053137 */ +static const char str_chinese[] = + "漢字,亦稱中文字、中国字,在台灣又被稱為國字,是漢字文化圈廣泛使用的一種文字,屬於表意文字的詞素音節文字"; + +typedef int (* GrindFunc) (const char *, gsize); + +static int +grind_get_char (const char *str, gsize len) +{ + gunichar acc = 0; + int i; + for (i = 0; i < NUM_ITERATIONS; i++) + { + const char *p = str; + while (*p) { + acc += g_utf8_get_char (p); + p = g_utf8_next_char (p); + } + } + return acc; +} + +static int +grind_get_char_validated (const char *str, gsize len) +{ + gunichar acc = 0; + int i; + for (i = 0; i < NUM_ITERATIONS; i++) + { + const char *p = str; + while (*p) { + acc += g_utf8_get_char_validated (p, -1); + p = g_utf8_next_char (p); + } + } + return acc; +} + +static int +grind_utf8_to_ucs4 (const char *str, gsize len) +{ + int i; + for (i = 0; i < NUM_ITERATIONS; i++) + { + gunichar *ustr; + ustr = g_utf8_to_ucs4 (str, -1, NULL, NULL, NULL); + g_free (ustr); + } + return 0; +} + +static int +grind_get_char_backwards (const char *str, gsize len) +{ + gunichar acc = 0; + int i; + for (i = 0; i < NUM_ITERATIONS; i++) + { + const char *p = str + len; + do + { + p = g_utf8_prev_char (p); + acc += g_utf8_get_char (p); + } + while (p != str); + } + return acc; +} + +static int +grind_utf8_to_ucs4_sized (const char *str, gsize len) +{ + int i; + for (i = 0; i < NUM_ITERATIONS; i++) + { + gunichar *ustr; + ustr = g_utf8_to_ucs4 (str, len, NULL, NULL, NULL); + g_free (ustr); + } + return 0; +} + +static int +grind_utf8_to_ucs4_fast (const char *str, gsize len) +{ + int i; + for (i = 0; i < NUM_ITERATIONS; i++) + { + gunichar *ustr; + ustr = g_utf8_to_ucs4_fast (str, -1, NULL); + g_free (ustr); + } + return 0; +} + +static int +grind_utf8_to_ucs4_fast_sized (const char *str, gsize len) +{ + int i; + for (i = 0; i < NUM_ITERATIONS; i++) + { + gunichar *ustr; + ustr = g_utf8_to_ucs4_fast (str, len, NULL); + g_free (ustr); + } + return 0; +} + +static void +perform_for (GrindFunc grind_func, const char *str, const char *label) +{ + gsize len; + gulong bytes_ground; + gdouble time_elapsed; + gdouble result; + + len = strlen (str); + bytes_ground = (gulong) len * NUM_ITERATIONS; + + g_test_timer_start (); + + grind_func (str, len); + + time_elapsed = g_test_timer_elapsed (); + + result = ((gdouble) bytes_ground / time_elapsed) * 1.0e-6; + + g_test_maximized_result (result, "%-9s %6.1f MB/s", label, result); +} + +static void +perform (gconstpointer data) +{ + GrindFunc grind_func = (GrindFunc) data; + + if (!g_test_perf ()) + return; + + perform_for (grind_func, str_ascii, "ASCII:"); + perform_for (grind_func, str_latin1, "Latin-1:"); + perform_for (grind_func, str_cyrillic, "Cyrillic:"); + perform_for (grind_func, str_chinese, "Chinese:"); +} + +int +main (int argc, char **argv) +{ + g_test_init (&argc, &argv, NULL); + g_test_add_data_func ("/utf8/perf/get_char", + grind_get_char, perform); + g_test_add_data_func ("/utf8/perf/get_char-backwards", + grind_get_char_backwards, perform); + g_test_add_data_func ("/utf8/perf/get_char_validated", + grind_get_char_validated, perform); + g_test_add_data_func ("/utf8/perf/utf8_to_ucs4", + grind_utf8_to_ucs4, perform); + g_test_add_data_func ("/utf8/perf/utf8_to_ucs4-sized", + grind_utf8_to_ucs4_sized, perform); + g_test_add_data_func ("/utf8/perf/utf8_to_ucs4_fast", + grind_utf8_to_ucs4_fast, perform); + g_test_add_data_func ("/utf8/perf/utf8_to_ucs4_fast-sized", + grind_utf8_to_ucs4_fast_sized, perform); + return g_test_run (); +}