mirror of
				https://gitlab.gnome.org/GNOME/glib.git
				synced 2025-11-01 08:52:18 +01:00 
			
		
		
		
	Havoc Pennington's implementation of convenient character set conversion
Sun Sep 10 12:37:40 2000 Owen Taylor <otaylor@redhat.com> * glib.h gconvert.c (g_convert): Havoc Pennington's implementation of convenient character set conversion using iconv, with the addition of GError. We probably need a fallback that just does conversions between, say UTF-8,16,32 and ISO-8859-1 for targets without iconv at all. Also add g_convert_with_fallback() to take care of conversions where we accept some loss going to the target encoding.
This commit is contained in:
		
							
								
								
									
										11
									
								
								ChangeLog
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								ChangeLog
									
									
									
									
									
								
							| @@ -1,3 +1,14 @@ | ||||
| Sun Sep 10 12:37:40 2000  Owen Taylor  <otaylor@redhat.com> | ||||
|  | ||||
| 	* glib.h gconvert.c (g_convert): Havoc Pennington's implementation  | ||||
| 	of convenient character set conversion using iconv, with | ||||
| 	the addition of GError. We probably need a fallback that | ||||
| 	just does conversions between, say UTF-8,16,32 and ISO-8859-1 | ||||
| 	for targets without iconv at all. | ||||
|  | ||||
| 	Also add g_convert_with_fallback() to take care of conversions  | ||||
| 	where we accept some loss going to the target encoding. | ||||
| 	 | ||||
| 2000-09-10  Havoc Pennington  <hp@redhat.com> | ||||
|  | ||||
| 	* gutf8.c (g_utf8_validate): Add this function. | ||||
|   | ||||
| @@ -1,3 +1,14 @@ | ||||
| Sun Sep 10 12:37:40 2000  Owen Taylor  <otaylor@redhat.com> | ||||
|  | ||||
| 	* glib.h gconvert.c (g_convert): Havoc Pennington's implementation  | ||||
| 	of convenient character set conversion using iconv, with | ||||
| 	the addition of GError. We probably need a fallback that | ||||
| 	just does conversions between, say UTF-8,16,32 and ISO-8859-1 | ||||
| 	for targets without iconv at all. | ||||
|  | ||||
| 	Also add g_convert_with_fallback() to take care of conversions  | ||||
| 	where we accept some loss going to the target encoding. | ||||
| 	 | ||||
| 2000-09-10  Havoc Pennington  <hp@redhat.com> | ||||
|  | ||||
| 	* gutf8.c (g_utf8_validate): Add this function. | ||||
|   | ||||
| @@ -1,3 +1,14 @@ | ||||
| Sun Sep 10 12:37:40 2000  Owen Taylor  <otaylor@redhat.com> | ||||
|  | ||||
| 	* glib.h gconvert.c (g_convert): Havoc Pennington's implementation  | ||||
| 	of convenient character set conversion using iconv, with | ||||
| 	the addition of GError. We probably need a fallback that | ||||
| 	just does conversions between, say UTF-8,16,32 and ISO-8859-1 | ||||
| 	for targets without iconv at all. | ||||
|  | ||||
| 	Also add g_convert_with_fallback() to take care of conversions  | ||||
| 	where we accept some loss going to the target encoding. | ||||
| 	 | ||||
| 2000-09-10  Havoc Pennington  <hp@redhat.com> | ||||
|  | ||||
| 	* gutf8.c (g_utf8_validate): Add this function. | ||||
|   | ||||
| @@ -1,3 +1,14 @@ | ||||
| Sun Sep 10 12:37:40 2000  Owen Taylor  <otaylor@redhat.com> | ||||
|  | ||||
| 	* glib.h gconvert.c (g_convert): Havoc Pennington's implementation  | ||||
| 	of convenient character set conversion using iconv, with | ||||
| 	the addition of GError. We probably need a fallback that | ||||
| 	just does conversions between, say UTF-8,16,32 and ISO-8859-1 | ||||
| 	for targets without iconv at all. | ||||
|  | ||||
| 	Also add g_convert_with_fallback() to take care of conversions  | ||||
| 	where we accept some loss going to the target encoding. | ||||
| 	 | ||||
| 2000-09-10  Havoc Pennington  <hp@redhat.com> | ||||
|  | ||||
| 	* gutf8.c (g_utf8_validate): Add this function. | ||||
|   | ||||
| @@ -1,3 +1,14 @@ | ||||
| Sun Sep 10 12:37:40 2000  Owen Taylor  <otaylor@redhat.com> | ||||
|  | ||||
| 	* glib.h gconvert.c (g_convert): Havoc Pennington's implementation  | ||||
| 	of convenient character set conversion using iconv, with | ||||
| 	the addition of GError. We probably need a fallback that | ||||
| 	just does conversions between, say UTF-8,16,32 and ISO-8859-1 | ||||
| 	for targets without iconv at all. | ||||
|  | ||||
| 	Also add g_convert_with_fallback() to take care of conversions  | ||||
| 	where we accept some loss going to the target encoding. | ||||
| 	 | ||||
| 2000-09-10  Havoc Pennington  <hp@redhat.com> | ||||
|  | ||||
| 	* gutf8.c (g_utf8_validate): Add this function. | ||||
|   | ||||
| @@ -1,3 +1,14 @@ | ||||
| Sun Sep 10 12:37:40 2000  Owen Taylor  <otaylor@redhat.com> | ||||
|  | ||||
| 	* glib.h gconvert.c (g_convert): Havoc Pennington's implementation  | ||||
| 	of convenient character set conversion using iconv, with | ||||
| 	the addition of GError. We probably need a fallback that | ||||
| 	just does conversions between, say UTF-8,16,32 and ISO-8859-1 | ||||
| 	for targets without iconv at all. | ||||
|  | ||||
| 	Also add g_convert_with_fallback() to take care of conversions  | ||||
| 	where we accept some loss going to the target encoding. | ||||
| 	 | ||||
| 2000-09-10  Havoc Pennington  <hp@redhat.com> | ||||
|  | ||||
| 	* gutf8.c (g_utf8_validate): Add this function. | ||||
|   | ||||
| @@ -1,3 +1,14 @@ | ||||
| Sun Sep 10 12:37:40 2000  Owen Taylor  <otaylor@redhat.com> | ||||
|  | ||||
| 	* glib.h gconvert.c (g_convert): Havoc Pennington's implementation  | ||||
| 	of convenient character set conversion using iconv, with | ||||
| 	the addition of GError. We probably need a fallback that | ||||
| 	just does conversions between, say UTF-8,16,32 and ISO-8859-1 | ||||
| 	for targets without iconv at all. | ||||
|  | ||||
| 	Also add g_convert_with_fallback() to take care of conversions  | ||||
| 	where we accept some loss going to the target encoding. | ||||
| 	 | ||||
| 2000-09-10  Havoc Pennington  <hp@redhat.com> | ||||
|  | ||||
| 	* gutf8.c (g_utf8_validate): Add this function. | ||||
|   | ||||
| @@ -1,3 +1,14 @@ | ||||
| Sun Sep 10 12:37:40 2000  Owen Taylor  <otaylor@redhat.com> | ||||
|  | ||||
| 	* glib.h gconvert.c (g_convert): Havoc Pennington's implementation  | ||||
| 	of convenient character set conversion using iconv, with | ||||
| 	the addition of GError. We probably need a fallback that | ||||
| 	just does conversions between, say UTF-8,16,32 and ISO-8859-1 | ||||
| 	for targets without iconv at all. | ||||
|  | ||||
| 	Also add g_convert_with_fallback() to take care of conversions  | ||||
| 	where we accept some loss going to the target encoding. | ||||
| 	 | ||||
| 2000-09-10  Havoc Pennington  <hp@redhat.com> | ||||
|  | ||||
| 	* gutf8.c (g_utf8_validate): Add this function. | ||||
|   | ||||
| @@ -45,6 +45,7 @@ libglib_1_3_la_SOURCES = 	\ | ||||
| 	gbacktrace.c		\ | ||||
| 	gcache.c		\ | ||||
| 	gcompletion.c		\ | ||||
| 	gconvert.c		\ | ||||
| 	gdataset.c		\ | ||||
| 	gdate.c         	\ | ||||
| 	gerror.c		\ | ||||
|   | ||||
							
								
								
									
										396
									
								
								gconvert.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										396
									
								
								gconvert.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,396 @@ | ||||
| /* GLIB - Library of useful routines for C programming | ||||
|  * | ||||
|  * gconvert.c: Convert between character sets using iconv | ||||
|  * Copyright Red Hat Inc., 2000 | ||||
|  * Authors: Havoc Pennington <hp@redhat.com>, Owen Taylor <otaylor@redhat.com | ||||
|  * | ||||
|  * This library is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * This library is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with this library; if not, write to the | ||||
|  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||||
|  * Boston, MA 02111-1307, USA. | ||||
|  */ | ||||
|  | ||||
| #include <iconv.h> | ||||
| #include <errno.h> | ||||
| #include <string.h> | ||||
|  | ||||
| #include "glib.h" | ||||
|  | ||||
| GQuark  | ||||
| g_convert_error_quark() | ||||
| { | ||||
|   static GQuark quark; | ||||
|   if (!quark) | ||||
|     quark = g_quark_from_static_string ("g_convert_error"); | ||||
|   return quark; | ||||
| } | ||||
|  | ||||
| static iconv_t | ||||
| open_converter (const gchar *to_codeset, | ||||
| 		const gchar *from_codeset, | ||||
| 		GError     **error) | ||||
| { | ||||
|   iconv_t cd = iconv_open (to_codeset, from_codeset); | ||||
|  | ||||
|   if (cd == (iconv_t) -1) | ||||
|     { | ||||
|       /* Something went wrong.  */ | ||||
|       if (errno == EINVAL) | ||||
| 	g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION, | ||||
| 		     "Conversion from character set `%s' to `%s' is not supported", | ||||
| 		     from_codeset, to_codeset); | ||||
|       else | ||||
|         g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_OTHER, | ||||
| 		     "Could not open converter from `%s' to `%s': %s", | ||||
| 		     from_codeset, to_codeset, strerror (errno)); | ||||
|     } | ||||
|  | ||||
|   return cd; | ||||
|  | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * g_convert: | ||||
|  * @str:          the string to convert | ||||
|  * @len:          the length of the string | ||||
|  * @to_codeset:   name of character set into which to convert @str | ||||
|  * @from_codeset: character set of @str. | ||||
|  * @bytes_read:   location to store the number of bytes in the | ||||
|  *                input string that were successfully converted, or %NULL. | ||||
|  *                Even if the conversion was succesful, this may be  | ||||
|  *                less than len if there were partial characters | ||||
|  *                at the end of the input. If the error | ||||
|  *                G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value | ||||
|  *                stored will the byte fofset after the last valid | ||||
|  *                input sequence. | ||||
|  * @error:        location to store the error occuring, or %NULL to ignore | ||||
|  *                errors. Any of the errors in #GConvertError may occur. | ||||
|  * | ||||
|  * Convert a string from one character set to another. | ||||
|  * | ||||
|  * Return value: If the conversion was successful, a newly allocated | ||||
|  *               NUL-terminated string, which must be freed with | ||||
|  *               g_free. Otherwise %NULL and @error will be set. | ||||
|  **/ | ||||
| gchar* | ||||
| g_convert (const gchar *str, | ||||
|            gint         len, | ||||
|            const gchar *to_codeset, | ||||
|            const gchar *from_codeset, | ||||
|            gint        *bytes_read, | ||||
| 	   gint        *bytes_written, | ||||
| 	   GError     **error) | ||||
| { | ||||
|   gchar *dest; | ||||
|   gchar *outp; | ||||
|   const gchar *p; | ||||
|   size_t inbytes_remaining; | ||||
|   size_t outbytes_remaining; | ||||
|   size_t err; | ||||
|   iconv_t cd; | ||||
|   size_t outbuf_size; | ||||
|   gboolean have_error = FALSE; | ||||
|    | ||||
|   g_return_val_if_fail (str != NULL, NULL); | ||||
|   g_return_val_if_fail (to_codeset != NULL, NULL); | ||||
|   g_return_val_if_fail (from_codeset != NULL, NULL); | ||||
|       | ||||
|   cd = open_converter (to_codeset, from_codeset, error); | ||||
|  | ||||
|   if (cd == (iconv_t) -1) | ||||
|     { | ||||
|       if (bytes_read) | ||||
|         *bytes_read = 0; | ||||
|        | ||||
|       if (bytes_written) | ||||
|         *bytes_written = 0; | ||||
|        | ||||
|       return NULL; | ||||
|     } | ||||
|  | ||||
|   if (len < 0) | ||||
|     len = strlen (str); | ||||
|  | ||||
|   p = str; | ||||
|   inbytes_remaining = len; | ||||
|   outbuf_size = len + 1; /* + 1 for nul in case len == 1 */ | ||||
|   outbytes_remaining = outbuf_size - 1; /* -1 for nul */ | ||||
|   outp = dest = g_malloc (outbuf_size); | ||||
|  | ||||
|  again: | ||||
|    | ||||
|   err = iconv (cd, &p, &inbytes_remaining, &outp, &outbytes_remaining); | ||||
|  | ||||
|   if (err == (size_t) -1) | ||||
|     { | ||||
|       switch (errno) | ||||
| 	{ | ||||
| 	case EINVAL: | ||||
| 	  /* Incomplete text, do not report an error */ | ||||
| 	  break; | ||||
| 	case E2BIG: | ||||
| 	  { | ||||
| 	    size_t used = outp - dest; | ||||
| 	    outbuf_size *= 2; | ||||
| 	    dest = g_realloc (dest, outbuf_size); | ||||
|  | ||||
| 	    outp = dest + used; | ||||
| 	    outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */ | ||||
|  | ||||
| 	    goto again; | ||||
| 	  } | ||||
| 	case EILSEQ: | ||||
| 	  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, | ||||
| 		       "Invalid byte sequence in conversion input"); | ||||
| 	  have_error = TRUE; | ||||
| 	  break; | ||||
| 	default: | ||||
| 	  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_OTHER, | ||||
| 		       "Error during conversion: %s", | ||||
| 		       strerror (errno)); | ||||
| 	  have_error = TRUE; | ||||
| 	  break; | ||||
| 	} | ||||
|     } | ||||
|  | ||||
|   *outp = '\0'; | ||||
|    | ||||
|   iconv_close (cd); | ||||
|  | ||||
|   if (bytes_read) | ||||
|     *bytes_read = p - str; | ||||
|  | ||||
|   if (bytes_written) | ||||
|     *bytes_written = outp - dest;	/* Doesn't include '\0' */ | ||||
|  | ||||
|   if (have_error) | ||||
|     { | ||||
|       g_free (dest); | ||||
|       return NULL; | ||||
|     } | ||||
|   else | ||||
|     return dest; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * g_convert_with_fallback: | ||||
|  * @str:          the string to convert | ||||
|  * @len:          the length of the string | ||||
|  * @to_codeset:   name of character set into which to convert @str | ||||
|  * @from_codeset: character set of @str. | ||||
|  * @fallback:     UTF-8 string to use in place of character not | ||||
|  *                present in the target encoding. (This must be | ||||
|  *                in the target encoding), if %NULL, characters | ||||
|  *                not in the target encoding will be represented | ||||
|  *                as Unicode escapes \x{XXXX} or \x{XXXXXX}. | ||||
|  * @bytes_read:   location to store the number of bytes in the | ||||
|  *                input string that were successfully converted, or %NULL. | ||||
|  *                Even if the conversion was succesful, this may be  | ||||
|  *                less than len if there were partial characters | ||||
|  *                at the end of the input. If the error | ||||
|  *                G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value | ||||
|  *                stored will the byte fofset after the last valid | ||||
|  *                input sequence. | ||||
|  * @error:        location to store the error occuring, or %NULL to ignore | ||||
|  *                errors. Any of the errors in #GConvertError may occur. | ||||
|  * | ||||
|  * Convert a string from one character set to another, possibly | ||||
|  * including fallback sequences for characters not representable | ||||
|  * in the output. Note that it is not guaranteed that the specification | ||||
|  * for the fallback sequences in @fallback will be honored. Some | ||||
|  * systems may do a approximate conversion from @from_codeset | ||||
|  * to @to_codeset in their iconv() functions, in which case GLib | ||||
|  * will simply return that approximate conversion. | ||||
|  * | ||||
|  * Return value: If the conversion was successful, a newly allocated | ||||
|  *               NUL-terminated string, which must be freed with | ||||
|  *               g_free. Otherwise %NULL and @error will be set. | ||||
|  **/ | ||||
| gchar* | ||||
| g_convert_with_fallback (const gchar *str, | ||||
| 			 gint         len, | ||||
| 			 const gchar *to_codeset, | ||||
| 			 const gchar *from_codeset, | ||||
| 			 gchar       *fallback, | ||||
| 			 gint        *bytes_read, | ||||
| 			 gint        *bytes_written, | ||||
| 			 GError     **error) | ||||
| { | ||||
|   gchar *utf8; | ||||
|   gchar *dest; | ||||
|   gchar *outp; | ||||
|   const gchar *insert_str = NULL; | ||||
|   const gchar *p; | ||||
|   size_t inbytes_remaining; | ||||
|   const gchar *save_p = NULL; | ||||
|   size_t save_inbytes = 0; | ||||
|   size_t outbytes_remaining; | ||||
|   size_t err; | ||||
|   iconv_t cd; | ||||
|   size_t outbuf_size; | ||||
|   gboolean have_error = FALSE; | ||||
|   gboolean done = FALSE; | ||||
|  | ||||
|   GError *local_error = NULL; | ||||
|    | ||||
|   g_return_val_if_fail (str != NULL, NULL); | ||||
|   g_return_val_if_fail (to_codeset != NULL, NULL); | ||||
|   g_return_val_if_fail (from_codeset != NULL, NULL); | ||||
|       | ||||
|   if (len < 0) | ||||
|     len = strlen (str); | ||||
|    | ||||
|   /* Try an exact conversion; we only proceed if this fails | ||||
|    * due to an illegal sequence in the input string. | ||||
|    */ | ||||
|   dest = g_convert (str, len, to_codeset, from_codeset,  | ||||
| 		    bytes_read, bytes_written, &local_error); | ||||
|   if (!local_error) | ||||
|     return dest; | ||||
|  | ||||
|   if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE)) | ||||
|     { | ||||
|       g_propagate_error (error, local_error); | ||||
|       return NULL; | ||||
|     } | ||||
|   else | ||||
|     g_error_free (local_error); | ||||
|  | ||||
|   /* No go; to proceed, we need a converter from "UTF-8" to | ||||
|    * to_codeset, and the string as UTF-8. | ||||
|    */ | ||||
|   cd = open_converter (to_codeset, "UTF-8", error); | ||||
|   if (cd == (iconv_t) -1) | ||||
|     { | ||||
|       if (bytes_read) | ||||
|         *bytes_read = 0; | ||||
|        | ||||
|       if (bytes_written) | ||||
|         *bytes_written = 0; | ||||
|        | ||||
|       return NULL; | ||||
|     } | ||||
|  | ||||
|   utf8 = g_convert (str, len, "UTF-8", from_codeset,  | ||||
| 		    bytes_read, &inbytes_remaining, error); | ||||
|   if (!utf8) | ||||
|     return NULL; | ||||
|  | ||||
|   /* Now the heart of the code. We loop through the UTF-8 string, and | ||||
|    * whenever we hit an offending character, we form fallback, convert | ||||
|    * the fallback to the target codeset, and then go back to | ||||
|    * converting the original string after finishing with the fallback. | ||||
|    * | ||||
|    * The variables save_p and save_inbytes store the input state | ||||
|    * for the original string while we are converting the fallback | ||||
|    */ | ||||
|   p = utf8; | ||||
|   outbuf_size = len + 1; /* + 1 for nul in case len == 1 */ | ||||
|   outbytes_remaining = outbuf_size - 1; /* -1 for nul */ | ||||
|   outp = dest = g_malloc (outbuf_size); | ||||
|  | ||||
|   while (!done && !have_error) | ||||
|     { | ||||
|       err = iconv (cd, &p, &inbytes_remaining, &outp, &outbytes_remaining); | ||||
|  | ||||
|       if (err == (size_t) -1) | ||||
| 	{ | ||||
| 	  switch (errno) | ||||
| 	    { | ||||
| 	    case EINVAL: | ||||
| 	      g_assert_not_reached(); | ||||
| 	      break; | ||||
| 	    case E2BIG: | ||||
| 	      { | ||||
| 		size_t used = outp - dest; | ||||
| 		outbuf_size *= 2; | ||||
| 		dest = g_realloc (dest, outbuf_size); | ||||
| 		 | ||||
| 		outp = dest + used; | ||||
| 		outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */ | ||||
| 		 | ||||
| 		break; | ||||
| 	      } | ||||
| 	    case EILSEQ: | ||||
| 	      if (save_p) | ||||
| 		{ | ||||
| 		  /* Error converting fallback string - fatal | ||||
| 		   */ | ||||
| 		  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, | ||||
| 			       "Cannot convert fallback '%s' to codeset '%s'", | ||||
| 			       insert_str, to_codeset); | ||||
| 		  have_error = TRUE; | ||||
| 		  break; | ||||
| 		} | ||||
| 	      else | ||||
| 		{ | ||||
| 		  if (!fallback) | ||||
| 		    {  | ||||
| 		      gunichar ch = g_utf8_get_char (p); | ||||
| 		      insert_str = g_strdup_printf ("\\x{%0*X}", | ||||
| 						    (ch < 0x10000) ? 4 : 6, | ||||
| 						    ch); | ||||
| 		    } | ||||
| 		  else | ||||
| 		    insert_str = fallback; | ||||
| 		   | ||||
| 		  save_p = g_utf8_next_char (p); | ||||
| 		  save_inbytes = inbytes_remaining - (save_p - p); | ||||
| 		  p = insert_str; | ||||
| 		  inbytes_remaining = strlen (p); | ||||
| 		} | ||||
| 	      break; | ||||
| 	    default: | ||||
| 	      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_OTHER, | ||||
| 			   "Error during conversion: %s", | ||||
| 			   strerror (errno)); | ||||
| 	      have_error = TRUE; | ||||
| 	      break; | ||||
| 	    } | ||||
| 	} | ||||
|       else | ||||
| 	{ | ||||
| 	  if (save_p) | ||||
| 	    { | ||||
| 	      if (!fallback) | ||||
| 		g_free ((gchar *)insert_str); | ||||
| 	      p = save_p; | ||||
| 	      inbytes_remaining = save_inbytes; | ||||
| 	      save_p = NULL; | ||||
| 	    } | ||||
| 	  else | ||||
| 	    done = TRUE; | ||||
| 	} | ||||
|     } | ||||
|  | ||||
|   /* Cleanup | ||||
|    */ | ||||
|   *outp = '\0'; | ||||
|    | ||||
|   iconv_close (cd); | ||||
|  | ||||
|   if (bytes_written) | ||||
|     *bytes_written = outp - str;	/* Doesn't include '\0' */ | ||||
|  | ||||
|   g_free (utf8); | ||||
|  | ||||
|   if (have_error) | ||||
|     { | ||||
|       if (save_p && !fallback) | ||||
| 	g_free ((gchar *)insert_str); | ||||
|       g_free (dest); | ||||
|       return NULL; | ||||
|     } | ||||
|   else | ||||
|     return dest; | ||||
| } | ||||
							
								
								
									
										26
									
								
								glib.h
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								glib.h
									
									
									
									
									
								
							| @@ -3403,6 +3403,32 @@ guint           g_thread_pool_get_num_unused_threads (void); | ||||
| /* Stop all currently unused threads, but leave the limit untouched */ | ||||
| void            g_thread_pool_stop_unused_threads    (void); | ||||
|  | ||||
| typedef enum  | ||||
| { | ||||
|   G_CONVERT_ERROR_NO_CONVERSION, | ||||
|   G_CONVERT_ERROR_ILLEGAL_SEQUENCE, | ||||
|   G_CONVERT_ERROR_OTHER | ||||
| } GConvertError; | ||||
|  | ||||
| #define G_CONVERT_ERROR g_convert_error_quark() | ||||
| GQuark g_convert_error_quark(); | ||||
|  | ||||
| gchar* g_convert               (const gchar  *str, | ||||
| 				gint          len, | ||||
| 				const gchar  *to_codeset, | ||||
| 				const gchar  *from_codeset, | ||||
| 				gint         *bytes_read, | ||||
| 				gint         *bytes_written, | ||||
| 				GError      **error); | ||||
| gchar* g_convert_with_fallback (const gchar  *str, | ||||
| 				gint          len, | ||||
| 				const gchar  *to_codeset, | ||||
| 				const gchar  *from_codeset, | ||||
| 				gchar        *fallback, | ||||
| 				gint         *bytes_read, | ||||
| 				gint         *bytes_written, | ||||
| 				GError      **error); | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif /* __cplusplus */ | ||||
|   | ||||
| @@ -45,6 +45,7 @@ libglib_1_3_la_SOURCES = 	\ | ||||
| 	gbacktrace.c		\ | ||||
| 	gcache.c		\ | ||||
| 	gcompletion.c		\ | ||||
| 	gconvert.c		\ | ||||
| 	gdataset.c		\ | ||||
| 	gdate.c         	\ | ||||
| 	gerror.c		\ | ||||
|   | ||||
							
								
								
									
										396
									
								
								glib/gconvert.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										396
									
								
								glib/gconvert.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,396 @@ | ||||
| /* GLIB - Library of useful routines for C programming | ||||
|  * | ||||
|  * gconvert.c: Convert between character sets using iconv | ||||
|  * Copyright Red Hat Inc., 2000 | ||||
|  * Authors: Havoc Pennington <hp@redhat.com>, Owen Taylor <otaylor@redhat.com | ||||
|  * | ||||
|  * This library is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * This library is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with this library; if not, write to the | ||||
|  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||||
|  * Boston, MA 02111-1307, USA. | ||||
|  */ | ||||
|  | ||||
| #include <iconv.h> | ||||
| #include <errno.h> | ||||
| #include <string.h> | ||||
|  | ||||
| #include "glib.h" | ||||
|  | ||||
| GQuark  | ||||
| g_convert_error_quark() | ||||
| { | ||||
|   static GQuark quark; | ||||
|   if (!quark) | ||||
|     quark = g_quark_from_static_string ("g_convert_error"); | ||||
|   return quark; | ||||
| } | ||||
|  | ||||
| static iconv_t | ||||
| open_converter (const gchar *to_codeset, | ||||
| 		const gchar *from_codeset, | ||||
| 		GError     **error) | ||||
| { | ||||
|   iconv_t cd = iconv_open (to_codeset, from_codeset); | ||||
|  | ||||
|   if (cd == (iconv_t) -1) | ||||
|     { | ||||
|       /* Something went wrong.  */ | ||||
|       if (errno == EINVAL) | ||||
| 	g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION, | ||||
| 		     "Conversion from character set `%s' to `%s' is not supported", | ||||
| 		     from_codeset, to_codeset); | ||||
|       else | ||||
|         g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_OTHER, | ||||
| 		     "Could not open converter from `%s' to `%s': %s", | ||||
| 		     from_codeset, to_codeset, strerror (errno)); | ||||
|     } | ||||
|  | ||||
|   return cd; | ||||
|  | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * g_convert: | ||||
|  * @str:          the string to convert | ||||
|  * @len:          the length of the string | ||||
|  * @to_codeset:   name of character set into which to convert @str | ||||
|  * @from_codeset: character set of @str. | ||||
|  * @bytes_read:   location to store the number of bytes in the | ||||
|  *                input string that were successfully converted, or %NULL. | ||||
|  *                Even if the conversion was succesful, this may be  | ||||
|  *                less than len if there were partial characters | ||||
|  *                at the end of the input. If the error | ||||
|  *                G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value | ||||
|  *                stored will the byte fofset after the last valid | ||||
|  *                input sequence. | ||||
|  * @error:        location to store the error occuring, or %NULL to ignore | ||||
|  *                errors. Any of the errors in #GConvertError may occur. | ||||
|  * | ||||
|  * Convert a string from one character set to another. | ||||
|  * | ||||
|  * Return value: If the conversion was successful, a newly allocated | ||||
|  *               NUL-terminated string, which must be freed with | ||||
|  *               g_free. Otherwise %NULL and @error will be set. | ||||
|  **/ | ||||
| gchar* | ||||
| g_convert (const gchar *str, | ||||
|            gint         len, | ||||
|            const gchar *to_codeset, | ||||
|            const gchar *from_codeset, | ||||
|            gint        *bytes_read, | ||||
| 	   gint        *bytes_written, | ||||
| 	   GError     **error) | ||||
| { | ||||
|   gchar *dest; | ||||
|   gchar *outp; | ||||
|   const gchar *p; | ||||
|   size_t inbytes_remaining; | ||||
|   size_t outbytes_remaining; | ||||
|   size_t err; | ||||
|   iconv_t cd; | ||||
|   size_t outbuf_size; | ||||
|   gboolean have_error = FALSE; | ||||
|    | ||||
|   g_return_val_if_fail (str != NULL, NULL); | ||||
|   g_return_val_if_fail (to_codeset != NULL, NULL); | ||||
|   g_return_val_if_fail (from_codeset != NULL, NULL); | ||||
|       | ||||
|   cd = open_converter (to_codeset, from_codeset, error); | ||||
|  | ||||
|   if (cd == (iconv_t) -1) | ||||
|     { | ||||
|       if (bytes_read) | ||||
|         *bytes_read = 0; | ||||
|        | ||||
|       if (bytes_written) | ||||
|         *bytes_written = 0; | ||||
|        | ||||
|       return NULL; | ||||
|     } | ||||
|  | ||||
|   if (len < 0) | ||||
|     len = strlen (str); | ||||
|  | ||||
|   p = str; | ||||
|   inbytes_remaining = len; | ||||
|   outbuf_size = len + 1; /* + 1 for nul in case len == 1 */ | ||||
|   outbytes_remaining = outbuf_size - 1; /* -1 for nul */ | ||||
|   outp = dest = g_malloc (outbuf_size); | ||||
|  | ||||
|  again: | ||||
|    | ||||
|   err = iconv (cd, &p, &inbytes_remaining, &outp, &outbytes_remaining); | ||||
|  | ||||
|   if (err == (size_t) -1) | ||||
|     { | ||||
|       switch (errno) | ||||
| 	{ | ||||
| 	case EINVAL: | ||||
| 	  /* Incomplete text, do not report an error */ | ||||
| 	  break; | ||||
| 	case E2BIG: | ||||
| 	  { | ||||
| 	    size_t used = outp - dest; | ||||
| 	    outbuf_size *= 2; | ||||
| 	    dest = g_realloc (dest, outbuf_size); | ||||
|  | ||||
| 	    outp = dest + used; | ||||
| 	    outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */ | ||||
|  | ||||
| 	    goto again; | ||||
| 	  } | ||||
| 	case EILSEQ: | ||||
| 	  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, | ||||
| 		       "Invalid byte sequence in conversion input"); | ||||
| 	  have_error = TRUE; | ||||
| 	  break; | ||||
| 	default: | ||||
| 	  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_OTHER, | ||||
| 		       "Error during conversion: %s", | ||||
| 		       strerror (errno)); | ||||
| 	  have_error = TRUE; | ||||
| 	  break; | ||||
| 	} | ||||
|     } | ||||
|  | ||||
|   *outp = '\0'; | ||||
|    | ||||
|   iconv_close (cd); | ||||
|  | ||||
|   if (bytes_read) | ||||
|     *bytes_read = p - str; | ||||
|  | ||||
|   if (bytes_written) | ||||
|     *bytes_written = outp - dest;	/* Doesn't include '\0' */ | ||||
|  | ||||
|   if (have_error) | ||||
|     { | ||||
|       g_free (dest); | ||||
|       return NULL; | ||||
|     } | ||||
|   else | ||||
|     return dest; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * g_convert_with_fallback: | ||||
|  * @str:          the string to convert | ||||
|  * @len:          the length of the string | ||||
|  * @to_codeset:   name of character set into which to convert @str | ||||
|  * @from_codeset: character set of @str. | ||||
|  * @fallback:     UTF-8 string to use in place of character not | ||||
|  *                present in the target encoding. (This must be | ||||
|  *                in the target encoding), if %NULL, characters | ||||
|  *                not in the target encoding will be represented | ||||
|  *                as Unicode escapes \x{XXXX} or \x{XXXXXX}. | ||||
|  * @bytes_read:   location to store the number of bytes in the | ||||
|  *                input string that were successfully converted, or %NULL. | ||||
|  *                Even if the conversion was succesful, this may be  | ||||
|  *                less than len if there were partial characters | ||||
|  *                at the end of the input. If the error | ||||
|  *                G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value | ||||
|  *                stored will the byte fofset after the last valid | ||||
|  *                input sequence. | ||||
|  * @error:        location to store the error occuring, or %NULL to ignore | ||||
|  *                errors. Any of the errors in #GConvertError may occur. | ||||
|  * | ||||
|  * Convert a string from one character set to another, possibly | ||||
|  * including fallback sequences for characters not representable | ||||
|  * in the output. Note that it is not guaranteed that the specification | ||||
|  * for the fallback sequences in @fallback will be honored. Some | ||||
|  * systems may do a approximate conversion from @from_codeset | ||||
|  * to @to_codeset in their iconv() functions, in which case GLib | ||||
|  * will simply return that approximate conversion. | ||||
|  * | ||||
|  * Return value: If the conversion was successful, a newly allocated | ||||
|  *               NUL-terminated string, which must be freed with | ||||
|  *               g_free. Otherwise %NULL and @error will be set. | ||||
|  **/ | ||||
| gchar* | ||||
| g_convert_with_fallback (const gchar *str, | ||||
| 			 gint         len, | ||||
| 			 const gchar *to_codeset, | ||||
| 			 const gchar *from_codeset, | ||||
| 			 gchar       *fallback, | ||||
| 			 gint        *bytes_read, | ||||
| 			 gint        *bytes_written, | ||||
| 			 GError     **error) | ||||
| { | ||||
|   gchar *utf8; | ||||
|   gchar *dest; | ||||
|   gchar *outp; | ||||
|   const gchar *insert_str = NULL; | ||||
|   const gchar *p; | ||||
|   size_t inbytes_remaining; | ||||
|   const gchar *save_p = NULL; | ||||
|   size_t save_inbytes = 0; | ||||
|   size_t outbytes_remaining; | ||||
|   size_t err; | ||||
|   iconv_t cd; | ||||
|   size_t outbuf_size; | ||||
|   gboolean have_error = FALSE; | ||||
|   gboolean done = FALSE; | ||||
|  | ||||
|   GError *local_error = NULL; | ||||
|    | ||||
|   g_return_val_if_fail (str != NULL, NULL); | ||||
|   g_return_val_if_fail (to_codeset != NULL, NULL); | ||||
|   g_return_val_if_fail (from_codeset != NULL, NULL); | ||||
|       | ||||
|   if (len < 0) | ||||
|     len = strlen (str); | ||||
|    | ||||
|   /* Try an exact conversion; we only proceed if this fails | ||||
|    * due to an illegal sequence in the input string. | ||||
|    */ | ||||
|   dest = g_convert (str, len, to_codeset, from_codeset,  | ||||
| 		    bytes_read, bytes_written, &local_error); | ||||
|   if (!local_error) | ||||
|     return dest; | ||||
|  | ||||
|   if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE)) | ||||
|     { | ||||
|       g_propagate_error (error, local_error); | ||||
|       return NULL; | ||||
|     } | ||||
|   else | ||||
|     g_error_free (local_error); | ||||
|  | ||||
|   /* No go; to proceed, we need a converter from "UTF-8" to | ||||
|    * to_codeset, and the string as UTF-8. | ||||
|    */ | ||||
|   cd = open_converter (to_codeset, "UTF-8", error); | ||||
|   if (cd == (iconv_t) -1) | ||||
|     { | ||||
|       if (bytes_read) | ||||
|         *bytes_read = 0; | ||||
|        | ||||
|       if (bytes_written) | ||||
|         *bytes_written = 0; | ||||
|        | ||||
|       return NULL; | ||||
|     } | ||||
|  | ||||
|   utf8 = g_convert (str, len, "UTF-8", from_codeset,  | ||||
| 		    bytes_read, &inbytes_remaining, error); | ||||
|   if (!utf8) | ||||
|     return NULL; | ||||
|  | ||||
|   /* Now the heart of the code. We loop through the UTF-8 string, and | ||||
|    * whenever we hit an offending character, we form fallback, convert | ||||
|    * the fallback to the target codeset, and then go back to | ||||
|    * converting the original string after finishing with the fallback. | ||||
|    * | ||||
|    * The variables save_p and save_inbytes store the input state | ||||
|    * for the original string while we are converting the fallback | ||||
|    */ | ||||
|   p = utf8; | ||||
|   outbuf_size = len + 1; /* + 1 for nul in case len == 1 */ | ||||
|   outbytes_remaining = outbuf_size - 1; /* -1 for nul */ | ||||
|   outp = dest = g_malloc (outbuf_size); | ||||
|  | ||||
|   while (!done && !have_error) | ||||
|     { | ||||
|       err = iconv (cd, &p, &inbytes_remaining, &outp, &outbytes_remaining); | ||||
|  | ||||
|       if (err == (size_t) -1) | ||||
| 	{ | ||||
| 	  switch (errno) | ||||
| 	    { | ||||
| 	    case EINVAL: | ||||
| 	      g_assert_not_reached(); | ||||
| 	      break; | ||||
| 	    case E2BIG: | ||||
| 	      { | ||||
| 		size_t used = outp - dest; | ||||
| 		outbuf_size *= 2; | ||||
| 		dest = g_realloc (dest, outbuf_size); | ||||
| 		 | ||||
| 		outp = dest + used; | ||||
| 		outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */ | ||||
| 		 | ||||
| 		break; | ||||
| 	      } | ||||
| 	    case EILSEQ: | ||||
| 	      if (save_p) | ||||
| 		{ | ||||
| 		  /* Error converting fallback string - fatal | ||||
| 		   */ | ||||
| 		  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, | ||||
| 			       "Cannot convert fallback '%s' to codeset '%s'", | ||||
| 			       insert_str, to_codeset); | ||||
| 		  have_error = TRUE; | ||||
| 		  break; | ||||
| 		} | ||||
| 	      else | ||||
| 		{ | ||||
| 		  if (!fallback) | ||||
| 		    {  | ||||
| 		      gunichar ch = g_utf8_get_char (p); | ||||
| 		      insert_str = g_strdup_printf ("\\x{%0*X}", | ||||
| 						    (ch < 0x10000) ? 4 : 6, | ||||
| 						    ch); | ||||
| 		    } | ||||
| 		  else | ||||
| 		    insert_str = fallback; | ||||
| 		   | ||||
| 		  save_p = g_utf8_next_char (p); | ||||
| 		  save_inbytes = inbytes_remaining - (save_p - p); | ||||
| 		  p = insert_str; | ||||
| 		  inbytes_remaining = strlen (p); | ||||
| 		} | ||||
| 	      break; | ||||
| 	    default: | ||||
| 	      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_OTHER, | ||||
| 			   "Error during conversion: %s", | ||||
| 			   strerror (errno)); | ||||
| 	      have_error = TRUE; | ||||
| 	      break; | ||||
| 	    } | ||||
| 	} | ||||
|       else | ||||
| 	{ | ||||
| 	  if (save_p) | ||||
| 	    { | ||||
| 	      if (!fallback) | ||||
| 		g_free ((gchar *)insert_str); | ||||
| 	      p = save_p; | ||||
| 	      inbytes_remaining = save_inbytes; | ||||
| 	      save_p = NULL; | ||||
| 	    } | ||||
| 	  else | ||||
| 	    done = TRUE; | ||||
| 	} | ||||
|     } | ||||
|  | ||||
|   /* Cleanup | ||||
|    */ | ||||
|   *outp = '\0'; | ||||
|    | ||||
|   iconv_close (cd); | ||||
|  | ||||
|   if (bytes_written) | ||||
|     *bytes_written = outp - str;	/* Doesn't include '\0' */ | ||||
|  | ||||
|   g_free (utf8); | ||||
|  | ||||
|   if (have_error) | ||||
|     { | ||||
|       if (save_p && !fallback) | ||||
| 	g_free ((gchar *)insert_str); | ||||
|       g_free (dest); | ||||
|       return NULL; | ||||
|     } | ||||
|   else | ||||
|     return dest; | ||||
| } | ||||
							
								
								
									
										26
									
								
								glib/glib.h
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								glib/glib.h
									
									
									
									
									
								
							| @@ -3403,6 +3403,32 @@ guint           g_thread_pool_get_num_unused_threads (void); | ||||
| /* Stop all currently unused threads, but leave the limit untouched */ | ||||
| void            g_thread_pool_stop_unused_threads    (void); | ||||
|  | ||||
| typedef enum  | ||||
| { | ||||
|   G_CONVERT_ERROR_NO_CONVERSION, | ||||
|   G_CONVERT_ERROR_ILLEGAL_SEQUENCE, | ||||
|   G_CONVERT_ERROR_OTHER | ||||
| } GConvertError; | ||||
|  | ||||
| #define G_CONVERT_ERROR g_convert_error_quark() | ||||
| GQuark g_convert_error_quark(); | ||||
|  | ||||
| gchar* g_convert               (const gchar  *str, | ||||
| 				gint          len, | ||||
| 				const gchar  *to_codeset, | ||||
| 				const gchar  *from_codeset, | ||||
| 				gint         *bytes_read, | ||||
| 				gint         *bytes_written, | ||||
| 				GError      **error); | ||||
| gchar* g_convert_with_fallback (const gchar  *str, | ||||
| 				gint          len, | ||||
| 				const gchar  *to_codeset, | ||||
| 				const gchar  *from_codeset, | ||||
| 				gchar        *fallback, | ||||
| 				gint         *bytes_read, | ||||
| 				gint         *bytes_written, | ||||
| 				GError      **error); | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif /* __cplusplus */ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user