Unit tests for g_utf8_pointer_to_offset and g_utf8_offset_to_pointer.

2005-11-17  Matthias Clasen  <mclasen@redhat.com>

	* tests/Makefile.am:
	* tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset
	and g_utf8_offset_to_pointer.

	* glib/gutf8.c (g_utf8_pointer_to_offset)
	(g_utf8_offset_to_pointer): Handle negative offsets, and use
	"stutter stepping" for going backwards. (#320638, Larry
	Ewing)
This commit is contained in:
Matthias Clasen 2005-11-17 15:50:31 +00:00 committed by Matthias Clasen
parent 92ea153ae9
commit 1ee0917984
6 changed files with 139 additions and 9 deletions

View File

@ -1,5 +1,14 @@
2005-11-17 Matthias Clasen <mclasen@redhat.com>
* tests/Makefile.am:
* tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset
and g_utf8_offset_to_pointer.
* glib/gutf8.c (g_utf8_pointer_to_offset)
(g_utf8_offset_to_pointer): Handle negative offsets, and use
"stutter stepping" for going backwards. (#320638, Larry
Ewing)
* glib/gbacktrace.c:
* glib/gdate.c:
* glib/gthread.c: const correctness fixes, found

View File

@ -1,5 +1,14 @@
2005-11-17 Matthias Clasen <mclasen@redhat.com>
* tests/Makefile.am:
* tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset
and g_utf8_offset_to_pointer.
* glib/gutf8.c (g_utf8_pointer_to_offset)
(g_utf8_offset_to_pointer): Handle negative offsets, and use
"stutter stepping" for going backwards. (#320638, Larry
Ewing)
* glib/gbacktrace.c:
* glib/gdate.c:
* glib/gthread.c: const correctness fixes, found

View File

@ -1,5 +1,14 @@
2005-11-17 Matthias Clasen <mclasen@redhat.com>
* tests/Makefile.am:
* tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset
and g_utf8_offset_to_pointer.
* glib/gutf8.c (g_utf8_pointer_to_offset)
(g_utf8_offset_to_pointer): Handle negative offsets, and use
"stutter stepping" for going backwards. (#320638, Larry
Ewing)
* glib/gbacktrace.c:
* glib/gdate.c:
* glib/gthread.c: const correctness fixes, found

View File

@ -284,6 +284,9 @@ g_utf8_get_char (const gchar *p)
* Converts from an integer character offset to a pointer to a position
* within the string.
*
* Since 2.10, this function allows to pass a negative @offset to
* step backwards.
*
* Return value: the resulting pointer
**/
gchar *
@ -291,9 +294,29 @@ g_utf8_offset_to_pointer (const gchar *str,
glong offset)
{
const gchar *s = str;
while (offset--)
s = g_utf8_next_char (s);
if (offset > 0)
while (offset--)
s = g_utf8_next_char (s);
else
{
const char *s1;
/* This nice technique for fast backwards stepping
* through a UTF-8 string was dubbed "stutter stepping"
* by its inventor, Larry Ewing.
*/
while (offset)
{
s1 = s;
s += offset;
while ((*s & 0xc0) == 0x80)
s--;
offset += g_utf8_pointer_to_offset (s, s1);
}
}
return (gchar *)s;
}
@ -304,6 +327,9 @@ g_utf8_offset_to_pointer (const gchar *str,
*
* Converts from a pointer to position within a string to a integer
* character offset.
*
* Since 2.10, this function allows @pos to be before @str, and returns
* a negative offset in this case.
*
* Return value: the resulting character offset
**/
@ -313,13 +339,16 @@ g_utf8_pointer_to_offset (const gchar *str,
{
const gchar *s = str;
glong offset = 0;
while (s < pos)
{
s = g_utf8_next_char (s);
offset++;
}
if (pos < str)
offset = - g_utf8_pointer_to_offset (pos, str);
else
while (s < pos)
{
s = g_utf8_next_char (s);
offset++;
}
return offset;
}

View File

@ -103,6 +103,7 @@ test_programs = \
unicode-caseconv \
unicode-encoding \
utf8-validate \
utf8-pointer \
uri-test
test_scripts = run-markup-tests.sh run-collate-tests.sh
@ -161,6 +162,7 @@ unicode_encoding_LDADD = $(progs_ldadd)
unicode_caseconv_LDADD = $(progs_ldadd)
unicode_collate_LDADD = $(progs_ldadd)
utf8_validate_LDADD = $(progs_ldadd)
utf8_pointer_LDADD = $(progs_ldadd)
uri_test_LDADD = $(progs_ldadd)
lib_LTLIBRARIES = libmoduletestplugin_a.la libmoduletestplugin_b.la

72
tests/utf8-pointer.c Normal file
View File

@ -0,0 +1,72 @@
/* GLIB - Library of useful routines for C programming
* Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
/*
* Modified by the GLib Team and others 1997-2000. See the AUTHORS
* file for a list of people on the GLib Team. See the ChangeLog
* files for a list of changes. These files are distributed with
* GLib at ftp://ftp.gtk.org/pub/gtk/.
*/
#include <glib.h>
/* Test conversions between offsets and pointers */
static void test_utf8 (gchar *string)
{
gint num_chars;
gchar **p;
gint i, j;
g_assert (g_utf8_validate (string, -1, NULL));
num_chars = g_utf8_strlen (string, -1);
p = (gchar **) g_malloc (num_chars * sizeof (gchar *));
p[0] = string;
for (i = 1; i < num_chars; i++)
p[i] = g_utf8_next_char (p[i-1]);
for (i = 0; i < num_chars; i++)
for (j = 0; j < num_chars; j++)
{
g_assert (g_utf8_offset_to_pointer (p[i], j - i) == p[j]);
g_assert (g_utf8_pointer_to_offset (p[i], p[j]) == j - i);
}
g_free (p);
}
gchar *longline = "asdasdas dsaf asfd as fdasdf asfd asdf as dfas dfasdf a"
"asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdççççççççças ffsd asfd as fdASASASAs As"
"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd"
"asd fasdf asdf asdf asd fasfd as fdaèèèèèèè òòòòòòòòòòòòsfd asdf as fdas ffsd asfd as fdASASASAs D"
"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfgùùùùùùùùùùùùùù sdfg sdf gsdfg sdfg sd"
"asd fasdf asdf asdf asd fasfd as fdasfd asd@@@@@@@f as fdas ffsd asfd as fdASASASAs D "
"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdf€€€€€€€€€€€€€€€€€€g sdfg sdfg sdf gsdfg sdfg sd"
"asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdas ffsd asfd as fdASASASAs D"
"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd\n\nlalala\n";
int main (int argc, char *argv[])
{
test_utf8 (longline);
return 0;
}