glib/tests/convert-test.c
Matthias Clasen 052f3e6bd9 Skip a test if iconv doesn't support the encoding
svn path=/trunk/; revision=7180
2008-07-14 18:17:20 +00:00

663 lines
18 KiB
C

/* GLIB - Library of useful routines for C programming
* Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
/*
* Modified by the GLib Team and others 1997-2000. See the AUTHORS
* file for a list of people on the GLib Team. See the ChangeLog
* files for a list of changes. These files are distributed with
* GLib at ftp://ftp.gtk.org/pub/gtk/.
*/
#undef G_DISABLE_ASSERT
#undef G_LOG_DOMAIN
#include <string.h>
#include <glib.h>
/* Bug 311337 */
static void
test_iconv_state (void)
{
gchar *in = "\xf4\xe5\xf8\xe5\xed";
gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
gchar *out;
gsize bytes_read = 0;
gsize bytes_written = 0;
GError *error = NULL;
out = g_convert (in, -1, "UTF-8", "CP1255",
&bytes_read, &bytes_written, &error);
if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
return; /* silently skip if CP1255 is not supported, see bug 467707 */
g_assert (error == NULL);
g_assert (bytes_read == 5);
g_assert (bytes_written == 10);
g_assert (strcmp (out, expected) == 0);
g_free (out);
}
/* some tests involving "vulgar fraction one half" */
static void
test_one_half (void)
{
gchar *in = "\xc2\xbd";
gchar *out;
gsize bytes_read = 0;
gsize bytes_written = 0;
GError *error = NULL;
out = g_convert (in, -1,
"ISO-8859-1", "UTF-8",
&bytes_read, &bytes_written,
&error);
g_assert (error == NULL);
g_assert (bytes_read == 2);
g_assert (bytes_written == 1);
g_assert (strcmp (out, "\xbd") == 0);
g_free (out);
out = g_convert (in, -1,
"ISO-8859-15", "UTF-8",
&bytes_read, &bytes_written,
&error);
g_assert (error && error->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
g_assert (bytes_read == 0);
g_assert (bytes_written == 0);
g_assert (out == NULL);
g_clear_error (&error);
g_free (out);
out = g_convert_with_fallback (in, -1,
"ISO8859-15", "UTF-8",
"a",
&bytes_read, &bytes_written,
&error);
g_assert (error == NULL);
g_assert (bytes_read == 2);
g_assert (bytes_written == 1);
g_assert (strcmp (out, "a") == 0);
g_free (out);
}
static void
test_byte_order (void)
{
gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
gchar *expected = "\xce\x93";
gchar *out;
gsize bytes_read = 0;
gsize bytes_written = 0;
GError *error = NULL;
out = g_convert (in_be, sizeof (in_be),
"UTF-8", "UTF-16",
&bytes_read, &bytes_written,
&error);
g_assert (error == NULL);
g_assert (bytes_read == 4);
g_assert (bytes_written == 2);
g_assert (strcmp (out, expected) == 0);
g_free (out);
out = g_convert (in_le, sizeof (in_le),
"UTF-8", "UTF-16",
&bytes_read, &bytes_written,
&error);
g_assert (error == NULL);
g_assert (bytes_read == 4);
g_assert (bytes_written == 2);
g_assert (strcmp (out, expected) == 0);
g_free (out);
}
static void
check_utf8_to_ucs4 (const char *utf8,
glong utf8_len,
const gunichar *ucs4,
glong ucs4_len,
glong error_pos)
{
gunichar *result, *result2, *result3;
glong items_read, items_read2;
glong items_written, items_written2;
GError *error, *error2, *error3;
gint i;
if (!error_pos)
{
/* check the fast conversion */
result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written);
g_assert (items_written == ucs4_len);
g_assert (result);
for (i = 0; i <= items_written; i++)
g_assert (result[i] == ucs4[i]);
g_free (result);
}
error = NULL;
result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error);
if (utf8_len == strlen (utf8))
{
/* check that len == -1 yields identical results */
error2 = NULL;
result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2);
g_assert (error || items_read2 == items_read);
g_assert (error || items_written2 == items_written2);
g_assert (!!result == !!result2);
g_assert (!!error == !!error2);
if (result)
for (i = 0; i <= items_written; i++)
g_assert (result[i] == result2[i]);
g_free (result2);
if (error2)
g_error_free (error2);
}
error3 = NULL;
result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3);
if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
{
g_assert (error == NULL);
g_assert (items_read == error_pos);
g_assert (items_written == ucs4_len);
g_assert (result);
for (i = 0; i <= items_written; i++)
g_assert (result[i] == ucs4[i]);
}
else if (error_pos)
{
g_assert (error != NULL);
g_assert (result == NULL);
g_assert (items_read == error_pos);
g_error_free (error);
g_assert (error3 != NULL);
g_assert (result3 == NULL);
g_error_free (error3);
}
else
{
g_assert (error == NULL);
g_assert (items_read == utf8_len);
g_assert (items_written == ucs4_len);
g_assert (result);
for (i = 0; i <= items_written; i++)
g_assert (result[i] == ucs4[i]);
g_assert (error3 == NULL);
g_assert (result3);
for (i = 0; i <= ucs4_len; i++)
g_assert (result3[i] == ucs4[i]);
}
g_free (result);
g_free (result3);
}
static void
check_ucs4_to_utf8 (const gunichar *ucs4,
glong ucs4_len,
const char *utf8,
glong utf8_len,
glong error_pos)
{
gchar *result, *result2, *result3;
glong items_read, items_read2;
glong items_written, items_written2;
GError *error, *error2, *error3;
error = NULL;
result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error);
if (ucs4[ucs4_len] == 0)
{
/* check that len == -1 yields identical results */
error2 = NULL;
result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2);
g_assert (error || items_read2 == items_read);
g_assert (error || items_written2 == items_written);
g_assert (!!result == !!result2);
g_assert (!!error == !!error2);
if (result)
g_assert (strcmp (result, result2) == 0);
g_free (result2);
if (error2)
g_error_free (error2);
}
error3 = NULL;
result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3);
if (error_pos)
{
g_assert (error != NULL);
g_assert (result == NULL);
g_assert (items_read == error_pos);
g_error_free (error);
g_assert (error3 != NULL);
g_assert (result3 == NULL);
g_error_free (error3);
}
else
{
g_assert (error == NULL);
g_assert (items_read == ucs4_len);
g_assert (items_written == utf8_len);
g_assert (result);
g_assert (strcmp (result, utf8) == 0);
g_assert (error3 == NULL);
g_assert (result3);
g_assert (strcmp (result3, utf8) == 0);
}
g_free (result);
g_free (result3);
}
static void
check_utf8_to_utf16 (const char *utf8,
glong utf8_len,
const gunichar2 *utf16,
glong utf16_len,
glong error_pos)
{
gunichar2 *result, *result2, *result3;
glong items_read, items_read2;
glong items_written, items_written2;
GError *error, *error2, *error3;
gint i;
error = NULL;
result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error);
if (utf8_len == strlen (utf8))
{
/* check that len == -1 yields identical results */
error2 = NULL;
result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2);
g_assert (error || items_read2 == items_read);
g_assert (error || items_written2 == items_written2);
g_assert (!!result == !!result2);
g_assert (!!error == !!error2);
if (result)
for (i = 0; i <= items_written; i++)
g_assert (result[i] == result2[i]);
g_free (result2);
if (error2)
g_error_free (error2);
}
error3 = NULL;
result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3);
if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
{
g_assert (error == NULL);
g_assert (items_read == error_pos);
g_assert (items_written == utf16_len);
g_assert (result);
for (i = 0; i <= items_written; i++)
g_assert (result[i] == utf16[i]);
}
else if (error_pos)
{
g_assert (error != NULL);
g_assert (result == NULL);
g_assert (items_read == error_pos);
g_error_free (error);
g_assert (error3 != NULL);
g_assert (result3 == NULL);
g_error_free (error3);
}
else
{
g_assert (error == NULL);
g_assert (items_read == utf8_len);
g_assert (items_written == utf16_len);
g_assert (result);
for (i = 0; i <= items_written; i++)
g_assert (result[i] == utf16[i]);
g_assert (error3 == NULL);
g_assert (result3);
for (i = 0; i <= utf16_len; i++)
g_assert (result3[i] == utf16[i]);
}
g_free (result);
g_free (result3);
}
static void
check_utf16_to_utf8 (const gunichar2 *utf16,
glong utf16_len,
const char *utf8,
glong utf8_len,
glong error_pos)
{
gchar *result, *result2, *result3;
glong items_read, items_read2;
glong items_written, items_written2;
GError *error, *error2, *error3;
error = NULL;
result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error);
if (utf16[utf16_len] == 0)
{
/* check that len == -1 yields identical results */
error2 = NULL;
result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2);
g_assert (error || items_read2 == items_read);
g_assert (error || items_written2 == items_written);
g_assert (!!result == !!result2);
g_assert (!!error == !!error2);
if (result)
g_assert (strcmp (result, result2) == 0);
g_free (result2);
if (error2)
g_error_free (error2);
}
error3 = NULL;
result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3);
if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
{
g_assert (error == NULL);
g_assert (items_read == error_pos);
g_assert (items_read + 1 == utf16_len);
g_assert (items_written == utf8_len);
g_assert (result);
g_assert (strcmp (result, utf8) == 0);
}
else if (error_pos)
{
g_assert (error != NULL);
g_assert (result == NULL);
g_assert (items_read == error_pos);
g_error_free (error);
g_assert (error3 != NULL);
g_assert (result3 == NULL);
g_error_free (error3);
}
else
{
g_assert (error == NULL);
g_assert (items_read == utf16_len);
g_assert (items_written == utf8_len);
g_assert (result);
g_assert (strcmp (result, utf8) == 0);
g_assert (error3 == NULL);
g_assert (result3);
g_assert (strcmp (result3, utf8) == 0);
}
g_free (result);
g_free (result3);
}
static void
check_ucs4_to_utf16 (const gunichar *ucs4,
glong ucs4_len,
const gunichar2 *utf16,
glong utf16_len,
glong error_pos)
{
gunichar2 *result, *result2, *result3;
glong items_read, items_read2;
glong items_written, items_written2;
GError *error, *error2, *error3;
gint i;
error = NULL;
result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error);
if (ucs4[ucs4_len] == 0)
{
/* check that len == -1 yields identical results */
error2 = NULL;
result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2);
g_assert (error || items_read2 == items_read);
g_assert (error || items_written2 == items_written);
g_assert (!!result == !!result2);
g_assert (!!error == !!error2);
if (result)
for (i = 0; i <= utf16_len; i++)
g_assert (result[i] == result2[i]);
g_free (result2);
if (error2)
g_error_free (error2);
}
error3 = NULL;
result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3);
if (error_pos)
{
g_assert (error != NULL);
g_assert (result == NULL);
g_assert (items_read == error_pos);
g_error_free (error);
g_assert (error3 != NULL);
g_assert (result3 == NULL);
g_error_free (error3);
}
else
{
g_assert (error == NULL);
g_assert (items_read == ucs4_len);
g_assert (items_written == utf16_len);
g_assert (result);
for (i = 0; i <= utf16_len; i++)
g_assert (result[i] == utf16[i]);
g_assert (error3 == NULL);
g_assert (result3);
for (i = 0; i <= utf16_len; i++)
g_assert (result3[i] == utf16[i]);
}
g_free (result);
g_free (result3);
}
static void
check_utf16_to_ucs4 (const gunichar2 *utf16,
glong utf16_len,
const gunichar *ucs4,
glong ucs4_len,
glong error_pos)
{
gunichar *result, *result2, *result3;
glong items_read, items_read2;
glong items_written, items_written2;
GError *error, *error2, *error3;
gint i;
error = NULL;
result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error);
if (utf16[utf16_len] == 0)
{
/* check that len == -1 yields identical results */
error2 = NULL;
result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2);
g_assert (error || items_read2 == items_read);
g_assert (error || items_written2 == items_written2);
g_assert (!!result == !!result2);
g_assert (!!error == !!error2);
if (result)
for (i = 0; i <= items_written; i++)
g_assert (result[i] == result2[i]);
g_free (result2);
if (error2)
g_error_free (error2);
}
error3 = NULL;
result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3);
if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
{
g_assert (error == NULL);
g_assert (items_read == error_pos);
g_assert (items_read + 1 == utf16_len);
g_assert (items_written == ucs4_len);
g_assert (result);
for (i = 0; i <= items_written; i++)
g_assert (result[i] == ucs4[i]);
}
else if (error_pos)
{
g_assert (error != NULL);
g_assert (result == NULL);
g_assert (items_read == error_pos);
g_error_free (error);
g_assert (error3 != NULL);
g_assert (result3 == NULL);
g_error_free (error3);
}
else
{
g_assert (error == NULL);
g_assert (items_read == utf16_len);
g_assert (items_written == ucs4_len);
g_assert (result);
for (i = 0; i <= ucs4_len; i++)
g_assert (result[i] == ucs4[i]);
g_assert (error3 == NULL);
g_assert (result3);
for (i = 0; i <= ucs4_len; i++)
g_assert (result3[i] == ucs4[i]);
}
g_free (result);
g_free (result3);
}
static void
test_unicode_conversions (void)
{
char *utf8;
gunichar ucs4[100];
gunichar2 utf16[100];
utf8 = "abc";
ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0);
check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0);
check_utf8_to_utf16 (utf8, 3, utf16, 3, 0);
check_utf16_to_utf8 (utf16, 3, utf8, 3, 0);
check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
utf8 = "\316\261\316\262\316\263";
ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0);
check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0);
check_utf8_to_utf16 (utf8, 6, utf16, 3, 0);
check_utf16_to_utf8 (utf16, 3, utf8, 6, 0);
check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
/* partial utf8 character */
utf8 = "abc\316";
ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3);
check_utf8_to_utf16 (utf8, 4, utf16, 3, 3);
/* invalid utf8 */
utf8 = "abc\316\316";
ucs4[0] = 0;
utf16[0] = 0;
check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3);
check_utf8_to_utf16 (utf8, 5, utf16, 0, 3);
/* partial utf16 character */
utf8 = "ab";
ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
check_utf16_to_utf8 (utf16, 3, utf8, 2, 2);
check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2);
/* invalid utf16 */
utf8 = NULL;
ucs4[0] = 0;
utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
check_utf16_to_utf8 (utf16, 3, utf8, 0, 2);
check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2);
/* invalid ucs4 */
utf8 = NULL;
ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
utf16[0] = 0;
check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2);
check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2);
}
int
main (int argc, char *argv[])
{
test_iconv_state ();
test_one_half ();
test_byte_order ();
test_unicode_conversions ();
return 0;
}