mirror of
				https://gitlab.gnome.org/GNOME/glib.git
				synced 2025-10-31 16:32:18 +01:00 
			
		
		
		
	Move GConvert docs inline
This commit is contained in:
		| @@ -2,159 +2,10 @@ | ||||
| Character Set Conversion | ||||
|  | ||||
| <!-- ##### SECTION Short_Description ##### --> | ||||
| convert strings between different character sets using iconv() | ||||
|  | ||||
|  | ||||
| <!-- ##### SECTION Long_Description ##### --> | ||||
| <para> | ||||
|  | ||||
| </para> | ||||
|  | ||||
|     <refsect2 id="file-name-encodings"> | ||||
|       <title>File Name Encodings</title> | ||||
|  | ||||
|       <para> | ||||
| 	Historically, Unix has not had a defined encoding for file | ||||
| 	names:  a file name is valid as long as it does not have path | ||||
| 	separators in it ("/").  However, displaying file names may | ||||
| 	require conversion:  from the character set in which they were | ||||
| 	created, to the character set in which the application | ||||
| 	operates.  Consider the Spanish file name | ||||
| 	"<filename>Presentación.sxi</filename>".  If the | ||||
| 	application which created it uses ISO-8859-1 for its encoding, | ||||
| 	then the actual file name on disk would look like this: | ||||
|       </para> | ||||
|  | ||||
|       <programlisting id="filename-iso8859-1"> | ||||
| Character:  P  r  e  s  e  n  t  a  c  i  ó  n  .  s  x  i | ||||
| Hex code:   50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69 | ||||
|       </programlisting> | ||||
|  | ||||
|       <para> | ||||
| 	However, if the application use UTF-8, the actual file name on | ||||
| 	disk would look like this: | ||||
|       </para> | ||||
|  | ||||
|       <programlisting id="filename-utf-8"> | ||||
| Character:  P  r  e  s  e  n  t  a  c  i  ó     n  .  s  x  i | ||||
| Hex code:   50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69 | ||||
|       </programlisting> | ||||
|  | ||||
|       <para> | ||||
| 	Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ | ||||
| 	that use Glib do the same thing.  If you get a file name from | ||||
| 	the file system, for example, from | ||||
| 	<function>readdir(3)</function> or from <link | ||||
| 	linkend="g_dir_read_name"><function>g_dir_read_name()</function></link>, | ||||
| 	and you wish to display the file name to the user, you | ||||
| 	<emphasis>will</emphasis> need to convert it into UTF-8.  The | ||||
| 	opposite case is when the user types the name of a file he | ||||
| 	wishes to save:  the toolkit will give you that string in | ||||
| 	UTF-8 encoding, and you will need to convert it to the | ||||
| 	character set used for file names before you can create the | ||||
| 	file with <function>open(2)</function> or | ||||
| 	<function>fopen(3)</function>. | ||||
|       </para> | ||||
|  | ||||
|       <para> | ||||
| 	By default, Glib assumes that file names on disk are in UTF-8 | ||||
| 	encoding.  This is a valid assumption for file systems which | ||||
| 	were created relatively recently:  most applications use UTF-8 | ||||
| 	encoding for their strings, and that is also what they use for | ||||
| 	the file names they create.  However, older file systems may | ||||
| 	still contain file names created in "older" encodings, such as | ||||
| 	ISO-8859-1.  In this case, for compatibility reasons, you may | ||||
| 	want to instruct Glib to use that particular encoding for file | ||||
| 	names rather than UTF-8.  You can do this by specifying the | ||||
| 	encoding for file names in the <link | ||||
| 	linkend="G_FILENAME_ENCODING"><envar>G_FILENAME_ENCODING</envar></link> | ||||
| 	environment variable.  For example, if your installation uses | ||||
| 	ISO-8859-1 for file names, you can put this in your | ||||
| 	<filename>~/.profile</filename>: | ||||
|       </para> | ||||
|  | ||||
|       <programlisting> | ||||
| export G_FILENAME_ENCODING=ISO-8859-1 | ||||
|       </programlisting> | ||||
|  | ||||
|       <para> | ||||
| 	Glib provides the functions <link | ||||
| 	linkend="g_filename_to_utf8"><function>g_filename_to_utf8()</function></link> | ||||
| 	and <link | ||||
| 	linkend="g_filename_from_utf8"><function>g_filename_from_utf8()</function></link> | ||||
| 	to perform the necessary conversions.  These functions convert | ||||
| 	file names from the encoding specified in | ||||
| 	<envar>G_FILENAME_ENCODING</envar> to UTF-8 and vice-versa. | ||||
| 	<xref linkend="file-name-encodings-diagram"/> illustrates how | ||||
| 	these functions are used to convert between UTF-8 and the | ||||
| 	encoding for file names in the file system. | ||||
|       </para> | ||||
|  | ||||
|       <figure id="file-name-encodings-diagram"> | ||||
| 	<title>Conversion between File Name Encodings</title> | ||||
| 	<graphic fileref="file-name-encodings.png" format="PNG"/> | ||||
|       </figure> | ||||
|  | ||||
|       <refsect3 id="file-name-encodings-checklist"> | ||||
| 	<title>Checklist for Application Writers</title> | ||||
|  | ||||
| 	<para> | ||||
| 	  This section is a practical summary of the detailed | ||||
| 	  description above.  You can use this as a checklist of | ||||
| 	  things to do to make sure your applications process file | ||||
| 	  name encodings correctly. | ||||
| 	</para> | ||||
|  | ||||
| 	<orderedlist> | ||||
| 	  <listitem> | ||||
| 	    <para> | ||||
| 	      If you get a file name from the file system from a | ||||
| 	      function such as <function>readdir(3)</function> or | ||||
| 	      <function>gtk_file_chooser_get_filename()</function>, | ||||
| 	      you do not need to do any conversion to pass that | ||||
| 	      file name to functions like <function>open(2)</function>, | ||||
| 	      <function>rename(2)</function>, or | ||||
| 	      <function>fopen(3)</function> — those are "raw" | ||||
| 	      file names which the file system understands. | ||||
| 	    </para> | ||||
| 	  </listitem> | ||||
|  | ||||
| 	  <listitem> | ||||
| 	    <para> | ||||
| 	      If you need to display a file name, convert it to UTF-8 | ||||
| 	      first by using <link | ||||
| 	      linkend="g_filename_to_utf8"><function>g_filename_to_utf8()</function></link>. | ||||
| 	      If conversion fails, display a string like | ||||
| 	      "<literal>Unknown file name</literal>".  <emphasis>Do | ||||
| 	      not</emphasis> convert this string back into the | ||||
| 	      encoding used for file names if you wish to pass it to | ||||
| 	      the file system; use the original file name instead. | ||||
| 	      For example, the document window of a word processor | ||||
| 	      could display "Unknown file name" in its title bar but | ||||
| 	      still let the user save the file, as it would keep the | ||||
| 	      raw file name internally.  This can happen if the user | ||||
| 	      has not set the <envar>G_FILENAME_ENCODING</envar> | ||||
| 	      environment variable even though he has files whose | ||||
| 	      names are not encoded in UTF-8. | ||||
| 	    </para> | ||||
| 	  </listitem> | ||||
|  | ||||
| 	  <listitem> | ||||
| 	    <para> | ||||
| 	      If your user interface lets the user type a file name | ||||
| 	      for saving or renaming, convert it to the encoding used | ||||
| 	      for file names in the file system by using <link | ||||
| 	      linkend="g_filename_from_utf8"><function>g_filename_from_utf8()</function></link>. | ||||
| 	      Pass the converted file name to functions like | ||||
| 	      <function>fopen(3)</function>.  If conversion fails, ask | ||||
| 	      the user to enter a different file name.  This can | ||||
| 	      happen if the user types Japanese characters when | ||||
| 	      <envar>G_FILENAME_ENCODING</envar> is set to | ||||
| 	      <literal>ISO-8859-1</literal>, for example. | ||||
| 	    </para> | ||||
| 	  </listitem> | ||||
| 	</orderedlist> | ||||
|       </refsect3> | ||||
|     </refsect2> | ||||
|  | ||||
| <!-- ##### SECTION See_Also ##### --> | ||||
| <para> | ||||
| @@ -200,9 +51,7 @@ export G_FILENAME_ENCODING=ISO-8859-1 | ||||
|  | ||||
| <!-- ##### STRUCT GIConv ##### --> | ||||
| <para> | ||||
| The <structname>GIConv</structname> struct wraps an | ||||
| <function>iconv()</function> conversion descriptor. It contains private data | ||||
| and should only be accessed using the following functions. | ||||
|  | ||||
| </para> | ||||
|  | ||||
|  | ||||
| @@ -222,9 +71,7 @@ and should only be accessed using the following functions. | ||||
|  | ||||
| <!-- ##### MACRO G_CONVERT_ERROR ##### --> | ||||
| <para> | ||||
| Error domain for character set conversions. Errors in this domain will | ||||
| be from the #GConvertError enumeration. See #GError for information on  | ||||
| error domains. | ||||
|  | ||||
| </para> | ||||
|  | ||||
|  | ||||
| @@ -342,16 +189,15 @@ error domains. | ||||
|  | ||||
| <!-- ##### ENUM GConvertError ##### --> | ||||
| <para> | ||||
| Error codes returned by character set conversion routines. | ||||
|  | ||||
| </para> | ||||
|  | ||||
| @G_CONVERT_ERROR_NO_CONVERSION: Conversion between the requested character sets | ||||
| is not supported. | ||||
| @G_CONVERT_ERROR_ILLEGAL_SEQUENCE: Invalid byte sequence in conversion input. | ||||
| @G_CONVERT_ERROR_FAILED: Conversion failed for some reason. | ||||
| @G_CONVERT_ERROR_PARTIAL_INPUT: Partial character sequence at end of input. | ||||
| @G_CONVERT_ERROR_BAD_URI: URI is invalid. | ||||
| @G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: Pathname is not an absolute path. | ||||
| @G_CONVERT_ERROR_NO_CONVERSION: | ||||
| @G_CONVERT_ERROR_ILLEGAL_SEQUENCE: | ||||
| @G_CONVERT_ERROR_FAILED: | ||||
| @G_CONVERT_ERROR_PARTIAL_INPUT: | ||||
| @G_CONVERT_ERROR_BAD_URI: | ||||
| @G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: | ||||
|  | ||||
| <!-- ##### FUNCTION g_get_charset ##### --> | ||||
| <para> | ||||
|   | ||||
							
								
								
									
										121
									
								
								glib/gconvert.c
									
									
									
									
									
								
							
							
						
						
									
										121
									
								
								glib/gconvert.c
									
									
									
									
									
								
							| @@ -56,6 +56,127 @@ | ||||
| #endif | ||||
|  | ||||
|  | ||||
| /** | ||||
|  * SECTION:conversions | ||||
|  * @title: Character Set Conversion | ||||
|  * @short_description: Convert strings between different character sets | ||||
|  * | ||||
|  * The g_convert() family of function wraps the functionality of iconv(). In | ||||
|  * addition to pure character set conversions, GLib has functions to deal | ||||
|  * with the extra complications of encodings for file names. | ||||
|  * | ||||
|  * <refsect2 id="file-name-encodings"> | ||||
|  * <title>File Name Encodings</title> | ||||
|  * <para> | ||||
|  * Historically, Unix has not had a defined encoding for file | ||||
|  * names:  a file name is valid as long as it does not have path | ||||
|  * separators in it ("/").  However, displaying file names may | ||||
|  * require conversion:  from the character set in which they were | ||||
|  * created, to the character set in which the application | ||||
|  * operates.  Consider the Spanish file name | ||||
|  * "<filename>Presentación.sxi</filename>".  If the | ||||
|  * application which created it uses ISO-8859-1 for its encoding, | ||||
|  * </para> | ||||
|  * <programlisting id="filename-iso8859-1"> | ||||
|  * Character:  P  r  e  s  e  n  t  a  c  i  ó  n  .  s  x  i | ||||
|  * Hex code:   50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69 | ||||
|  * </programlisting> | ||||
|  * <para> | ||||
|  * However, if the application use UTF-8, the actual file name on | ||||
|  * disk would look like this: | ||||
|  * </para> | ||||
|  * <programlisting id="filename-utf-8"> | ||||
|  * Character:  P  r  e  s  e  n  t  a  c  i  ó     n  .  s  x  i | ||||
|  * Hex code:   50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69 | ||||
|  * </programlisting> | ||||
|  * <para> | ||||
|  * Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ | ||||
|  * that use Glib do the same thing.  If you get a file name from | ||||
|  * the file system, for example, from readdir(3) or from g_dir_read_name(), | ||||
|  * and you wish to display the file name to the user, you | ||||
|  * emphasis>will</emphasis> need to convert it into UTF-8.  The | ||||
|  * opposite case is when the user types the name of a file he | ||||
|  * wishes to save:  the toolkit will give you that string in | ||||
|  * UTF-8 encoding, and you will need to convert it to the | ||||
|  * character set used for file names before you can create the | ||||
|  * file with open(2) or fopen(3). | ||||
|  * </para> | ||||
|  * <para> | ||||
|  * By default, Glib assumes that file names on disk are in UTF-8 | ||||
|  * encoding.  This is a valid assumption for file systems which | ||||
|  * were created relatively recently:  most applications use UTF-8 | ||||
|  * encoding for their strings, and that is also what they use for | ||||
|  * the file names they create.  However, older file systems may | ||||
|  * still contain file names created in "older" encodings, such as | ||||
|  * ISO-8859-1. In this case, for compatibility reasons, you may | ||||
|  * want to instruct Glib to use that particular encoding for file | ||||
|  * names rather than UTF-8.  You can do this by specifying the | ||||
|  * encoding for file names in the <link | ||||
|  * linkend="G_FILENAME_ENCODING"><envar>G_FILENAME_ENCODING</envar></link> | ||||
|  * environment variable.  For example, if your installation uses | ||||
|  * ISO-8859-1 for file names, you can put this in your | ||||
|  * <filename>~/.profile</filename>: | ||||
|  * </para> | ||||
|  * <programlisting> | ||||
|  * export G_FILENAME_ENCODING=ISO-8859-1 | ||||
|  * </programlisting> | ||||
|  * <para> | ||||
|  * Glib provides the functions g_filename_to_utf8() and | ||||
|  * g_filename_from_utf8() to perform the necessary conversions. These | ||||
|  * functions convert file names from the encoding specified in | ||||
|  * <envar>G_FILENAME_ENCODING</envar> to UTF-8 and vice-versa. | ||||
|  * <xref linkend="file-name-encodings-diagram"/> illustrates how | ||||
|  * these functions are used to convert between UTF-8 and the | ||||
|  * encoding for file names in the file system. | ||||
|  * </para> | ||||
|  * <figure id="file-name-encodings-diagram"> | ||||
|  * <title>Conversion between File Name Encodings</title> | ||||
|  * <graphic fileref="file-name-encodings.png" format="PNG"/> | ||||
|  * </figure> | ||||
|  * <refsect3 id="file-name-encodings-checklist"> | ||||
|  * <title>Checklist for Application Writers</title> | ||||
|  * <para> | ||||
|  * This section is a practical summary of the detailed | ||||
|  * description above.  You can use this as a checklist of | ||||
|  * things to do to make sure your applications process file | ||||
|  * name encodings correctly. | ||||
|  * </para> | ||||
|  * <orderedlist> | ||||
|  * <listitem><para> | ||||
|  * If you get a file name from the file system from a function | ||||
|  * such as readdir(3) or gtk_file_chooser_get_filename(), | ||||
|  * you do not need to do any conversion to pass that | ||||
|  * file name to functions like open(2), rename(2), or | ||||
|  * fopen(3) — those are "raw" file names which the file | ||||
|  * system understands. | ||||
|  * </para></listitem> | ||||
|  * <listitem><para> | ||||
|  * If you need to display a file name, convert it to UTF-8 first by | ||||
|  * using g_filename_to_utf8(). If conversion fails, display a string like | ||||
|  * "<literal>Unknown file name</literal>". <emphasis>Do not</emphasis> | ||||
|  * convert this string back into the encoding used for file names if you | ||||
|  * wish to pass it to the file system; use the original file name instead. | ||||
|  * For example, the document window of a word processor could display | ||||
|  * "Unknown file name" in its title bar but still let the user save the | ||||
|  * file, as it would keep the raw file name internally. This can happen | ||||
|  * if the user has not set the <envar>G_FILENAME_ENCODING</envar> | ||||
|  * environment variable even though he has files whose names are not | ||||
|  * encoded in UTF-8. | ||||
|  * </para></listitem> | ||||
|  * <listitem><para> | ||||
|  * If your user interface lets the user type a file name for saving or | ||||
|  * renaming, convert it to the encoding used for file names in the file | ||||
|  * system by using g_filename_from_utf8(). Pass the converted file name | ||||
|  * to functions like fopen(3). If conversion fails, ask the user to enter | ||||
|  * a different file name. This can happen if the user types Japanese | ||||
|  * characters when <envar>G_FILENAME_ENCODING</envar> is set to | ||||
|  * <literal>ISO-8859-1</literal>, for example. | ||||
|  * </para></listitem> | ||||
|  * orderedlist> | ||||
|  * </refsect3> | ||||
|  * </refsect2> | ||||
|  */ | ||||
|  | ||||
| /* We try to terminate strings in unknown charsets with this many zero bytes | ||||
|  * to ensure that multibyte strings really are nul-terminated when we return | ||||
|  * them from g_convert() and friends. | ||||
|   | ||||
| @@ -35,6 +35,18 @@ | ||||
|  | ||||
| G_BEGIN_DECLS | ||||
|  | ||||
| /** | ||||
|  * GConvertError: | ||||
|  * @G_CONVERT_ERROR_NO_CONVERSION: Conversion between the requested character | ||||
|  *     sets is not supported. | ||||
|  * @G_CONVERT_ERROR_ILLEGAL_SEQUENCE: Invalid byte sequence in conversion input. | ||||
|  * @G_CONVERT_ERROR_FAILED: Conversion failed for some reason. | ||||
|  * @G_CONVERT_ERROR_PARTIAL_INPUT: Partial character sequence at end of input. | ||||
|  * @G_CONVERT_ERROR_BAD_URI: URI is invalid. | ||||
|  * @G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: Pathname is not an absolute path. | ||||
|  * | ||||
|  * Error codes returned by character set conversion routines. | ||||
|  */ | ||||
| typedef enum | ||||
| { | ||||
|   G_CONVERT_ERROR_NO_CONVERSION, | ||||
| @@ -45,10 +57,22 @@ typedef enum | ||||
|   G_CONVERT_ERROR_NOT_ABSOLUTE_PATH | ||||
| } GConvertError; | ||||
|  | ||||
| /** | ||||
|  * G_CONVERT_ERROR: | ||||
|  * | ||||
|  * Error domain for character set conversions. Errors in this domain will | ||||
|  * be from the #GConvertError enumeration. See #GError for information on | ||||
|  * error domains. | ||||
|  */ | ||||
| #define G_CONVERT_ERROR g_convert_error_quark() | ||||
| GQuark g_convert_error_quark (void); | ||||
|  | ||||
| /* Thin wrappers around iconv | ||||
| /** | ||||
|  * GIconv: | ||||
|  * | ||||
|  * The <structname>GIConv</structname> struct wraps an | ||||
|  * iconv() conversion descriptor. It contains private data | ||||
|  * and should only be accessed using the following functions. | ||||
|  */ | ||||
| typedef struct _GIConv *GIConv; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user