| 
									
										
										
										
											2002-07-04 15:19:30 +00:00
										 |  |  | #undef G_DISABLE_ASSERT
 | 
					
						
							|  |  |  | #undef G_LOG_DOMAIN
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  | #include <stdarg.h>
 | 
					
						
							|  |  |  | #include <stdio.h>
 | 
					
						
							|  |  |  | #include <stdlib.h>
 | 
					
						
							|  |  |  | #include <string.h>
 | 
					
						
							|  |  |  | #include <glib.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static gint exit_status = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Changes for 64-bit cleanliness, loosely based on patch from Mark Murnane.
Wed Jun 20 12:00:54 2001  Owen Taylor  <otaylor@redhat.com>
        Changes for 64-bit cleanliness, loosely based on patch
	from Mark Murnane.
	* gconvert.c (g_convert/g_convert_with_fallback): Remove
	workarounds for since-fixed GNU libc bugs. Minor
	doc fix.
	* gconvert.[ch]: Change gint to gsize/gssize as
	appropriate.
	* gconvert.c (g_locale/filename_to/from_utf8): Fix incorrect
	computation of bytes_read / bytes_written.
	* gfileutils.[ch] (g_file_get_contents): Make length
	out parameter 'gsize *len'.
	* ghook.c (g_hook_compare_ids): Don't compare a
	and b as 'a - b'.
	* gmacros.h (GSIZE_TO_POINTER): Add GPOINTER_TO_SIZE,
	GSIZE_TO_POINTER.
	* gmain.c (g_timeout_prepare): Rewrite to avoid
	overflows. (Fixes bug when system clock skews
	backwards more than 24 days.)
	* gmarkup.[ch]: Make lengths passed to callbacks
	gsize, length for g_markup_parse-context_parse(),
	g_markup_escape_text() gssize.
	* gmessages.[ch] (g_printf_string_upper_bound): Change
	return value to gsize.
	* gmessages.c (printf_string_upper_bound): Remove
	a ridiculous use of 'inline' on a 300 line function.
	* gstring.[ch]: Represent size of string as a gsize,
	not gint. Make parameters to functions take gsize,
	or gssize where -1 is allowed.
	* gstring.c (g_string_erase): Make
	g_string_erase (string, pos, -1) a synonym for
	g_string_truncate for consistency with other G*
	APIs.
	* gstrfuncs.[ch]: Make all functions taking a string
	length, take a gsize, or gssize if -1 is allowed.
	(g_strstr_len, g_strrstr_len). Also fix some boundary
	conditions in g_str[r]str[_len].
	* gutf8.c tests/unicode-encoding.c: Make parameters that
	are byte lengths gsize, gssize as appropriate. Make
	character offsets, other counts, glong.
	* gasyncqueue.c gcompletion.c
          timeloop.c timeloop-basic.c gutils.c gspawn.c.
	Small 64 bit cleanliness fixups.
	* glist.c (g_list_sort2, g_list_sort_real): Fix functions
	that should have been static.
	* gdate.c (g_date_fill_parse_tokens): Fix extra
	declaration that was shadowing another.
	* tests/module-test.c: Include string.h
Mon Jun 18 15:43:29 2001  Owen Taylor  <otaylor@redhat.com>
	* gutf8.c (g_get_charset): Make argument
	G_CONST_RETURN char **.
											
										 
											2001-06-23 13:55:09 +00:00
										 |  |  | static void | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  | croak (char *format, ...) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   va_list va; | 
					
						
							|  |  |  |    | 
					
						
							|  |  |  |   va_start (va, format); | 
					
						
							|  |  |  |   vfprintf (stderr, format, va); | 
					
						
							|  |  |  |   va_end (va); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   exit (1); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Changes for 64-bit cleanliness, loosely based on patch from Mark Murnane.
Wed Jun 20 12:00:54 2001  Owen Taylor  <otaylor@redhat.com>
        Changes for 64-bit cleanliness, loosely based on patch
	from Mark Murnane.
	* gconvert.c (g_convert/g_convert_with_fallback): Remove
	workarounds for since-fixed GNU libc bugs. Minor
	doc fix.
	* gconvert.[ch]: Change gint to gsize/gssize as
	appropriate.
	* gconvert.c (g_locale/filename_to/from_utf8): Fix incorrect
	computation of bytes_read / bytes_written.
	* gfileutils.[ch] (g_file_get_contents): Make length
	out parameter 'gsize *len'.
	* ghook.c (g_hook_compare_ids): Don't compare a
	and b as 'a - b'.
	* gmacros.h (GSIZE_TO_POINTER): Add GPOINTER_TO_SIZE,
	GSIZE_TO_POINTER.
	* gmain.c (g_timeout_prepare): Rewrite to avoid
	overflows. (Fixes bug when system clock skews
	backwards more than 24 days.)
	* gmarkup.[ch]: Make lengths passed to callbacks
	gsize, length for g_markup_parse-context_parse(),
	g_markup_escape_text() gssize.
	* gmessages.[ch] (g_printf_string_upper_bound): Change
	return value to gsize.
	* gmessages.c (printf_string_upper_bound): Remove
	a ridiculous use of 'inline' on a 300 line function.
	* gstring.[ch]: Represent size of string as a gsize,
	not gint. Make parameters to functions take gsize,
	or gssize where -1 is allowed.
	* gstring.c (g_string_erase): Make
	g_string_erase (string, pos, -1) a synonym for
	g_string_truncate for consistency with other G*
	APIs.
	* gstrfuncs.[ch]: Make all functions taking a string
	length, take a gsize, or gssize if -1 is allowed.
	(g_strstr_len, g_strrstr_len). Also fix some boundary
	conditions in g_str[r]str[_len].
	* gutf8.c tests/unicode-encoding.c: Make parameters that
	are byte lengths gsize, gssize as appropriate. Make
	character offsets, other counts, glong.
	* gasyncqueue.c gcompletion.c
          timeloop.c timeloop-basic.c gutils.c gspawn.c.
	Small 64 bit cleanliness fixups.
	* glist.c (g_list_sort2, g_list_sort_real): Fix functions
	that should have been static.
	* gdate.c (g_date_fill_parse_tokens): Fix extra
	declaration that was shadowing another.
	* tests/module-test.c: Include string.h
Mon Jun 18 15:43:29 2001  Owen Taylor  <otaylor@redhat.com>
	* gutf8.c (g_get_charset): Make argument
	G_CONST_RETURN char **.
											
										 
											2001-06-23 13:55:09 +00:00
										 |  |  | static void | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  | fail (char *format, ...) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   va_list va; | 
					
						
							|  |  |  |    | 
					
						
							|  |  |  |   va_start (va, format); | 
					
						
							|  |  |  |   vfprintf (stderr, format, va); | 
					
						
							|  |  |  |   va_end (va); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   exit_status |= 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef enum | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   VALID, | 
					
						
							|  |  |  |   INCOMPLETE, | 
					
						
							|  |  |  |   NOTUNICODE, | 
					
						
							|  |  |  |   OVERLONG, | 
					
						
							|  |  |  |   MALFORMED | 
					
						
							|  |  |  | } Status; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static gboolean | 
					
						
							|  |  |  | ucs4_equal (gunichar *a, gunichar *b) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   while (*a && *b && (*a == *b)) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       a++; | 
					
						
							|  |  |  |       b++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return (*a == *b); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static gboolean | 
					
						
							|  |  |  | utf16_equal (gunichar2 *a, gunichar2 *b) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   while (*a && *b && (*a == *b)) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       a++; | 
					
						
							|  |  |  |       b++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return (*a == *b); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static gint | 
					
						
							|  |  |  | utf16_count (gunichar2 *a) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   gint result = 0; | 
					
						
							|  |  |  |    | 
					
						
							|  |  |  |   while (a[result]) | 
					
						
							|  |  |  |     result++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return result; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | process (gint      line, | 
					
						
							|  |  |  | 	 gchar    *utf8, | 
					
						
							|  |  |  | 	 Status    status, | 
					
						
							|  |  |  | 	 gunichar *ucs4, | 
					
						
							|  |  |  | 	 gint      ucs4_len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   const gchar *end; | 
					
						
							|  |  |  |   gboolean is_valid = g_utf8_validate (utf8, -1, &end); | 
					
						
							|  |  |  |   GError *error = NULL; | 
					
						
							| 
									
										
											  
											
												Changes for 64-bit cleanliness, loosely based on patch from Mark Murnane.
Wed Jun 20 12:00:54 2001  Owen Taylor  <otaylor@redhat.com>
        Changes for 64-bit cleanliness, loosely based on patch
	from Mark Murnane.
	* gconvert.c (g_convert/g_convert_with_fallback): Remove
	workarounds for since-fixed GNU libc bugs. Minor
	doc fix.
	* gconvert.[ch]: Change gint to gsize/gssize as
	appropriate.
	* gconvert.c (g_locale/filename_to/from_utf8): Fix incorrect
	computation of bytes_read / bytes_written.
	* gfileutils.[ch] (g_file_get_contents): Make length
	out parameter 'gsize *len'.
	* ghook.c (g_hook_compare_ids): Don't compare a
	and b as 'a - b'.
	* gmacros.h (GSIZE_TO_POINTER): Add GPOINTER_TO_SIZE,
	GSIZE_TO_POINTER.
	* gmain.c (g_timeout_prepare): Rewrite to avoid
	overflows. (Fixes bug when system clock skews
	backwards more than 24 days.)
	* gmarkup.[ch]: Make lengths passed to callbacks
	gsize, length for g_markup_parse-context_parse(),
	g_markup_escape_text() gssize.
	* gmessages.[ch] (g_printf_string_upper_bound): Change
	return value to gsize.
	* gmessages.c (printf_string_upper_bound): Remove
	a ridiculous use of 'inline' on a 300 line function.
	* gstring.[ch]: Represent size of string as a gsize,
	not gint. Make parameters to functions take gsize,
	or gssize where -1 is allowed.
	* gstring.c (g_string_erase): Make
	g_string_erase (string, pos, -1) a synonym for
	g_string_truncate for consistency with other G*
	APIs.
	* gstrfuncs.[ch]: Make all functions taking a string
	length, take a gsize, or gssize if -1 is allowed.
	(g_strstr_len, g_strrstr_len). Also fix some boundary
	conditions in g_str[r]str[_len].
	* gutf8.c tests/unicode-encoding.c: Make parameters that
	are byte lengths gsize, gssize as appropriate. Make
	character offsets, other counts, glong.
	* gasyncqueue.c gcompletion.c
          timeloop.c timeloop-basic.c gutils.c gspawn.c.
	Small 64 bit cleanliness fixups.
	* glist.c (g_list_sort2, g_list_sort_real): Fix functions
	that should have been static.
	* gdate.c (g_date_fill_parse_tokens): Fix extra
	declaration that was shadowing another.
	* tests/module-test.c: Include string.h
Mon Jun 18 15:43:29 2001  Owen Taylor  <otaylor@redhat.com>
	* gutf8.c (g_get_charset): Make argument
	G_CONST_RETURN char **.
											
										 
											2001-06-23 13:55:09 +00:00
										 |  |  |   glong items_read, items_written; | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   switch (status) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |     case VALID: | 
					
						
							|  |  |  |       if (!is_valid) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: valid but g_utf8_validate returned FALSE\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case NOTUNICODE: | 
					
						
							|  |  |  |     case INCOMPLETE: | 
					
						
							|  |  |  |     case OVERLONG: | 
					
						
							|  |  |  |     case MALFORMED: | 
					
						
							|  |  |  |       if (is_valid) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (status == INCOMPLETE) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       gunichar *ucs4_result;       | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT)) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: incomplete input not properly detected\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       g_clear_error (&error); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (!ucs4_result || items_read == strlen (utf8)) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: incomplete input not properly detected\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       g_free (ucs4_result); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (status == VALID || status == NOTUNICODE) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       gunichar *ucs4_result; | 
					
						
							|  |  |  |       gchar *utf8_result; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error); | 
					
						
							|  |  |  |       if (!ucs4_result) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       if (!ucs4_equal (ucs4_result, ucs4) || | 
					
						
							|  |  |  | 	  items_read != strlen (utf8) || | 
					
						
							|  |  |  | 	  items_written != ucs4_len) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       g_free (ucs4_result); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written); | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       if (!ucs4_equal (ucs4_result, ucs4) || | 
					
						
							|  |  |  | 	  items_written != ucs4_len) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error); | 
					
						
							|  |  |  |       if (!utf8_result) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: conversion back to utf8 failed: %s", line, error->message); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (strcmp (utf8_result, utf8) != 0 || | 
					
						
							|  |  |  | 	  items_read != ucs4_len || | 
					
						
							|  |  |  | 	  items_written != strlen (utf8)) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: conversion back to utf8 did not match original\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       g_free (utf8_result); | 
					
						
							|  |  |  |       g_free (ucs4_result); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (status == VALID) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       gunichar2 *utf16_expected_tmp; | 
					
						
							|  |  |  |       gunichar2 *utf16_expected; | 
					
						
							|  |  |  |       gunichar2 *utf16_from_utf8; | 
					
						
							|  |  |  |       gunichar2 *utf16_from_ucs4; | 
					
						
							|  |  |  |       gunichar *ucs4_result; | 
					
						
							| 
									
										
											  
											
												Changes for 64-bit cleanliness, loosely based on patch from Mark Murnane.
Wed Jun 20 12:00:54 2001  Owen Taylor  <otaylor@redhat.com>
        Changes for 64-bit cleanliness, loosely based on patch
	from Mark Murnane.
	* gconvert.c (g_convert/g_convert_with_fallback): Remove
	workarounds for since-fixed GNU libc bugs. Minor
	doc fix.
	* gconvert.[ch]: Change gint to gsize/gssize as
	appropriate.
	* gconvert.c (g_locale/filename_to/from_utf8): Fix incorrect
	computation of bytes_read / bytes_written.
	* gfileutils.[ch] (g_file_get_contents): Make length
	out parameter 'gsize *len'.
	* ghook.c (g_hook_compare_ids): Don't compare a
	and b as 'a - b'.
	* gmacros.h (GSIZE_TO_POINTER): Add GPOINTER_TO_SIZE,
	GSIZE_TO_POINTER.
	* gmain.c (g_timeout_prepare): Rewrite to avoid
	overflows. (Fixes bug when system clock skews
	backwards more than 24 days.)
	* gmarkup.[ch]: Make lengths passed to callbacks
	gsize, length for g_markup_parse-context_parse(),
	g_markup_escape_text() gssize.
	* gmessages.[ch] (g_printf_string_upper_bound): Change
	return value to gsize.
	* gmessages.c (printf_string_upper_bound): Remove
	a ridiculous use of 'inline' on a 300 line function.
	* gstring.[ch]: Represent size of string as a gsize,
	not gint. Make parameters to functions take gsize,
	or gssize where -1 is allowed.
	* gstring.c (g_string_erase): Make
	g_string_erase (string, pos, -1) a synonym for
	g_string_truncate for consistency with other G*
	APIs.
	* gstrfuncs.[ch]: Make all functions taking a string
	length, take a gsize, or gssize if -1 is allowed.
	(g_strstr_len, g_strrstr_len). Also fix some boundary
	conditions in g_str[r]str[_len].
	* gutf8.c tests/unicode-encoding.c: Make parameters that
	are byte lengths gsize, gssize as appropriate. Make
	character offsets, other counts, glong.
	* gasyncqueue.c gcompletion.c
          timeloop.c timeloop-basic.c gutils.c gspawn.c.
	Small 64 bit cleanliness fixups.
	* glist.c (g_list_sort2, g_list_sort_real): Fix functions
	that should have been static.
	* gdate.c (g_date_fill_parse_tokens): Fix extra
	declaration that was shadowing another.
	* tests/module-test.c: Include string.h
Mon Jun 18 15:43:29 2001  Owen Taylor  <otaylor@redhat.com>
	* gutf8.c (g_get_charset): Make argument
	G_CONST_RETURN char **.
											
										 
											2001-06-23 13:55:09 +00:00
										 |  |  |       gsize bytes_written; | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  |       gint n_chars; | 
					
						
							|  |  |  |       gchar *utf8_result; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-24 15:25:11 +00:00
										 |  |  | #if G_BYTE_ORDER == G_LITTLE_ENDIAN
 | 
					
						
							| 
									
										
										
										
											2001-01-06 03:09:46 +00:00
										 |  |  | #define TARGET "UTF-16LE"
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | #define TARGET "UTF-16"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8", | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  | 							 NULL, &bytes_written, NULL))) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: could not convert to UTF-16 via g_convert\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       /* zero-terminate and remove BOM
 | 
					
						
							|  |  |  |        */ | 
					
						
							|  |  |  |       n_chars = bytes_written / 2; | 
					
						
							|  |  |  |       if (utf16_expected_tmp[0] == 0xfeff) /* BOM */ | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  n_chars--; | 
					
						
							|  |  |  | 	  utf16_expected = g_new (gunichar2, n_chars + 1); | 
					
						
							|  |  |  | 	  memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */ | 
					
						
							|  |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2001-01-06 03:09:46 +00:00
										 |  |  | 	  fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line); | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  utf16_expected = g_new (gunichar2, n_chars + 1); | 
					
						
							|  |  |  | 	  memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       utf16_expected[n_chars] = '\0'; | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error))) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (items_read != strlen (utf8) || | 
					
						
							|  |  |  | 	  utf16_count (utf16_from_utf8) != items_written) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: length error in conversion to ucs16\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error))) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (items_read != ucs4_len || | 
					
						
							|  |  |  | 	  utf16_count (utf16_from_ucs4) != items_written) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: length error in conversion to ucs16\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (!utf16_equal (utf16_from_utf8, utf16_expected) || | 
					
						
							|  |  |  | 	  !utf16_equal (utf16_from_ucs4, utf16_expected)) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: results of conversion to ucs16 do not match\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error))) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (items_read != utf16_count (utf16_from_utf8) || | 
					
						
							|  |  |  | 	  items_written != strlen (utf8)) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: length error in conversion from ucs16 to utf8\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error))) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: conversion back to utf8/ucs4 failed\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (items_read != utf16_count (utf16_from_utf8) || | 
					
						
							|  |  |  | 	  items_written != ucs4_len) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: length error in conversion from ucs16 to ucs4\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (strcmp (utf8, utf8_result) != 0 || | 
					
						
							|  |  |  | 	  !ucs4_equal (ucs4, ucs4_result)) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line); | 
					
						
							|  |  |  | 	  return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       g_free (utf16_expected_tmp); | 
					
						
							|  |  |  |       g_free (utf16_expected); | 
					
						
							|  |  |  |       g_free (utf16_from_utf8); | 
					
						
							|  |  |  |       g_free (utf16_from_ucs4); | 
					
						
							|  |  |  |       g_free (utf8_result); | 
					
						
							|  |  |  |       g_free (ucs4_result); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | main (int argc, char **argv) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   gchar *srcdir = getenv ("srcdir"); | 
					
						
							|  |  |  |   gchar *testfile; | 
					
						
							|  |  |  |   gchar *contents; | 
					
						
							|  |  |  |   GError *error = NULL; | 
					
						
							|  |  |  |   gchar *p, *end; | 
					
						
							|  |  |  |   char *tmp; | 
					
						
							|  |  |  |   gint state = 0; | 
					
						
							|  |  |  |   gint line = 1; | 
					
						
							|  |  |  |   gint start_line = 0;		/* Quiet GCC */ | 
					
						
							|  |  |  |   gchar *utf8 = NULL;		/* Quiet GCC */ | 
					
						
							|  |  |  |   GArray *ucs4; | 
					
						
							|  |  |  |   Status status = VALID;	/* Quiet GCC */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (!srcdir) | 
					
						
							|  |  |  |     srcdir = "."; | 
					
						
							|  |  |  |    | 
					
						
							| 
									
										
										
										
											2001-01-06 03:09:46 +00:00
										 |  |  |   testfile = g_strconcat (srcdir, G_DIR_SEPARATOR_S "utf8.txt", NULL); | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  |    | 
					
						
							|  |  |  |   g_file_get_contents (testfile, &contents, NULL, &error); | 
					
						
							|  |  |  |   if (error) | 
					
						
							|  |  |  |     croak ("Cannot open utf8.txt: %s", error->message); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   p = contents; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /* Loop over lines */ | 
					
						
							|  |  |  |   while (*p) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       while (*p && (*p == ' ' || *p == '\t')) | 
					
						
							|  |  |  | 	p++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       end = p; | 
					
						
							| 
									
										
										
										
											2001-01-06 03:09:46 +00:00
										 |  |  |       while (*end && (*end != '\r' && *end != '\n')) | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  | 	end++; | 
					
						
							|  |  |  |        | 
					
						
							| 
									
										
										
										
											2001-01-06 03:09:46 +00:00
										 |  |  |       if (!*p || *p == '#' || *p == '\r' || *p == '\n') | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  | 	goto next_line; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       tmp = g_strstrip (g_strndup (p, end - p)); | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       switch (state) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	case 0: | 
					
						
							|  |  |  | 	  /* UTF-8 string */ | 
					
						
							|  |  |  | 	  start_line = line; | 
					
						
							|  |  |  | 	  utf8 = tmp; | 
					
						
							|  |  |  | 	  tmp = NULL; | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	   | 
					
						
							|  |  |  | 	case 1: | 
					
						
							|  |  |  | 	  /* Status */ | 
					
						
							|  |  |  | 	  if (!strcmp (tmp, "VALID")) | 
					
						
							|  |  |  | 	    status = VALID; | 
					
						
							|  |  |  | 	  else if (!strcmp (tmp, "INCOMPLETE")) | 
					
						
							|  |  |  | 	    status = INCOMPLETE; | 
					
						
							|  |  |  | 	  else if (!strcmp (tmp, "NOTUNICODE")) | 
					
						
							|  |  |  | 	    status = NOTUNICODE; | 
					
						
							|  |  |  | 	  else if (!strcmp (tmp, "OVERLONG")) | 
					
						
							|  |  |  | 	    status = OVERLONG; | 
					
						
							|  |  |  | 	  else if (!strcmp (tmp, "MALFORMED")) | 
					
						
							|  |  |  | 	    status = MALFORMED; | 
					
						
							|  |  |  | 	  else | 
					
						
							|  |  |  | 	    croak ("Invalid status on line %d\n", line); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if (status != VALID && status != NOTUNICODE) | 
					
						
							|  |  |  | 	    state++;		/* No UCS-4 data */ | 
					
						
							|  |  |  | 	   | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	   | 
					
						
							|  |  |  | 	case 2: | 
					
						
							|  |  |  | 	  /* UCS-4 version */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  p = strtok (tmp, " \t"); | 
					
						
							|  |  |  | 	  while (p) | 
					
						
							|  |  |  | 	    { | 
					
						
							|  |  |  | 	      gchar *endptr; | 
					
						
							|  |  |  | 	       | 
					
						
							|  |  |  | 	      gunichar ch = strtoul (p, &endptr, 16); | 
					
						
							|  |  |  | 	      if (*endptr != '\0') | 
					
						
							|  |  |  | 		croak ("Invalid UCS-4 character on line %d\n", line); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	      g_array_append_val (ucs4, ch); | 
					
						
							|  |  |  | 	       | 
					
						
							|  |  |  | 	      p = strtok (NULL, " \t"); | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       g_free (tmp); | 
					
						
							|  |  |  |       state = (state + 1) % 3; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (state == 0) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 	  process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len); | 
					
						
							|  |  |  | 	  g_array_set_size (ucs4, 0); | 
					
						
							|  |  |  | 	  g_free (utf8); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |     next_line: | 
					
						
							|  |  |  |       p = end; | 
					
						
							| 
									
										
										
										
											2001-01-06 03:09:46 +00:00
										 |  |  |       if (*p && *p == '\r') | 
					
						
							|  |  |  | 	p++; | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  |       if (*p && *p == '\n') | 
					
						
							|  |  |  | 	p++; | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       line++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-31 16:51:52 +00:00
										 |  |  |   return exit_status; | 
					
						
							| 
									
										
										
										
											2001-01-05 21:22:47 +00:00
										 |  |  | } |