diff --git a/docs/reference/glib/tmpl/conversions.sgml b/docs/reference/glib/tmpl/conversions.sgml index af4b36888..e68cde338 100644 --- a/docs/reference/glib/tmpl/conversions.sgml +++ b/docs/reference/glib/tmpl/conversions.sgml @@ -2,159 +2,10 @@ Character Set Conversion -convert strings between different character sets using iconv() + - - - - - File Name Encodings - - - Historically, Unix has not had a defined encoding for file - names: a file name is valid as long as it does not have path - separators in it ("/"). However, displaying file names may - require conversion: from the character set in which they were - created, to the character set in which the application - operates. Consider the Spanish file name - "Presentación.sxi". If the - application which created it uses ISO-8859-1 for its encoding, - then the actual file name on disk would look like this: - - - -Character: P r e s e n t a c i ó n . s x i -Hex code: 50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69 - - - - However, if the application use UTF-8, the actual file name on - disk would look like this: - - - -Character: P r e s e n t a c i ó n . s x i -Hex code: 50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69 - - - - Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ - that use Glib do the same thing. If you get a file name from - the file system, for example, from - readdir(3) or from g_dir_read_name(), - and you wish to display the file name to the user, you - will need to convert it into UTF-8. The - opposite case is when the user types the name of a file he - wishes to save: the toolkit will give you that string in - UTF-8 encoding, and you will need to convert it to the - character set used for file names before you can create the - file with open(2) or - fopen(3). - - - - By default, Glib assumes that file names on disk are in UTF-8 - encoding. This is a valid assumption for file systems which - were created relatively recently: most applications use UTF-8 - encoding for their strings, and that is also what they use for - the file names they create. However, older file systems may - still contain file names created in "older" encodings, such as - ISO-8859-1. In this case, for compatibility reasons, you may - want to instruct Glib to use that particular encoding for file - names rather than UTF-8. You can do this by specifying the - encoding for file names in the G_FILENAME_ENCODING - environment variable. For example, if your installation uses - ISO-8859-1 for file names, you can put this in your - ~/.profile: - - - -export G_FILENAME_ENCODING=ISO-8859-1 - - - - Glib provides the functions g_filename_to_utf8() - and g_filename_from_utf8() - to perform the necessary conversions. These functions convert - file names from the encoding specified in - G_FILENAME_ENCODING to UTF-8 and vice-versa. - illustrates how - these functions are used to convert between UTF-8 and the - encoding for file names in the file system. - - -
- Conversion between File Name Encodings - -
- - - Checklist for Application Writers - - - This section is a practical summary of the detailed - description above. You can use this as a checklist of - things to do to make sure your applications process file - name encodings correctly. - - - - - - If you get a file name from the file system from a - function such as readdir(3) or - gtk_file_chooser_get_filename(), - you do not need to do any conversion to pass that - file name to functions like open(2), - rename(2), or - fopen(3) — those are "raw" - file names which the file system understands. - - - - - - If you need to display a file name, convert it to UTF-8 - first by using g_filename_to_utf8(). - If conversion fails, display a string like - "Unknown file name". Do - not convert this string back into the - encoding used for file names if you wish to pass it to - the file system; use the original file name instead. - For example, the document window of a word processor - could display "Unknown file name" in its title bar but - still let the user save the file, as it would keep the - raw file name internally. This can happen if the user - has not set the G_FILENAME_ENCODING - environment variable even though he has files whose - names are not encoded in UTF-8. - - - - - - If your user interface lets the user type a file name - for saving or renaming, convert it to the encoding used - for file names in the file system by using g_filename_from_utf8(). - Pass the converted file name to functions like - fopen(3). If conversion fails, ask - the user to enter a different file name. This can - happen if the user types Japanese characters when - G_FILENAME_ENCODING is set to - ISO-8859-1, for example. - - - - -
@@ -200,9 +51,7 @@ export G_FILENAME_ENCODING=ISO-8859-1 -The GIConv struct wraps an -iconv() conversion descriptor. It contains private data -and should only be accessed using the following functions. + @@ -222,9 +71,7 @@ and should only be accessed using the following functions. -Error domain for character set conversions. Errors in this domain will -be from the #GConvertError enumeration. See #GError for information on -error domains. + @@ -342,16 +189,15 @@ error domains. -Error codes returned by character set conversion routines. + -@G_CONVERT_ERROR_NO_CONVERSION: Conversion between the requested character sets -is not supported. -@G_CONVERT_ERROR_ILLEGAL_SEQUENCE: Invalid byte sequence in conversion input. -@G_CONVERT_ERROR_FAILED: Conversion failed for some reason. -@G_CONVERT_ERROR_PARTIAL_INPUT: Partial character sequence at end of input. -@G_CONVERT_ERROR_BAD_URI: URI is invalid. -@G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: Pathname is not an absolute path. +@G_CONVERT_ERROR_NO_CONVERSION: +@G_CONVERT_ERROR_ILLEGAL_SEQUENCE: +@G_CONVERT_ERROR_FAILED: +@G_CONVERT_ERROR_PARTIAL_INPUT: +@G_CONVERT_ERROR_BAD_URI: +@G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: diff --git a/glib/gconvert.c b/glib/gconvert.c index 19fd80210..3207c5577 100644 --- a/glib/gconvert.c +++ b/glib/gconvert.c @@ -56,6 +56,127 @@ #endif +/** + * SECTION:conversions + * @title: Character Set Conversion + * @short_description: Convert strings between different character sets + * + * The g_convert() family of function wraps the functionality of iconv(). In + * addition to pure character set conversions, GLib has functions to deal + * with the extra complications of encodings for file names. + * + * + * File Name Encodings + * + * Historically, Unix has not had a defined encoding for file + * names: a file name is valid as long as it does not have path + * separators in it ("/"). However, displaying file names may + * require conversion: from the character set in which they were + * created, to the character set in which the application + * operates. Consider the Spanish file name + * "Presentación.sxi". If the + * application which created it uses ISO-8859-1 for its encoding, + * + * + * Character: P r e s e n t a c i ó n . s x i + * Hex code: 50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69 + * + * + * However, if the application use UTF-8, the actual file name on + * disk would look like this: + * + * + * Character: P r e s e n t a c i ó n . s x i + * Hex code: 50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69 + * + * + * Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ + * that use Glib do the same thing. If you get a file name from + * the file system, for example, from readdir(3) or from g_dir_read_name(), + * and you wish to display the file name to the user, you + * emphasis>will need to convert it into UTF-8. The + * opposite case is when the user types the name of a file he + * wishes to save: the toolkit will give you that string in + * UTF-8 encoding, and you will need to convert it to the + * character set used for file names before you can create the + * file with open(2) or fopen(3). + * + * + * By default, Glib assumes that file names on disk are in UTF-8 + * encoding. This is a valid assumption for file systems which + * were created relatively recently: most applications use UTF-8 + * encoding for their strings, and that is also what they use for + * the file names they create. However, older file systems may + * still contain file names created in "older" encodings, such as + * ISO-8859-1. In this case, for compatibility reasons, you may + * want to instruct Glib to use that particular encoding for file + * names rather than UTF-8. You can do this by specifying the + * encoding for file names in the G_FILENAME_ENCODING + * environment variable. For example, if your installation uses + * ISO-8859-1 for file names, you can put this in your + * ~/.profile: + * + * + * export G_FILENAME_ENCODING=ISO-8859-1 + * + * + * Glib provides the functions g_filename_to_utf8() and + * g_filename_from_utf8() to perform the necessary conversions. These + * functions convert file names from the encoding specified in + * G_FILENAME_ENCODING to UTF-8 and vice-versa. + * illustrates how + * these functions are used to convert between UTF-8 and the + * encoding for file names in the file system. + * + *
+ * Conversion between File Name Encodings + * + *
+ * + * Checklist for Application Writers + * + * This section is a practical summary of the detailed + * description above. You can use this as a checklist of + * things to do to make sure your applications process file + * name encodings correctly. + * + * + * + * If you get a file name from the file system from a function + * such as readdir(3) or gtk_file_chooser_get_filename(), + * you do not need to do any conversion to pass that + * file name to functions like open(2), rename(2), or + * fopen(3) — those are "raw" file names which the file + * system understands. + * + * + * If you need to display a file name, convert it to UTF-8 first by + * using g_filename_to_utf8(). If conversion fails, display a string like + * "Unknown file name". Do not + * convert this string back into the encoding used for file names if you + * wish to pass it to the file system; use the original file name instead. + * For example, the document window of a word processor could display + * "Unknown file name" in its title bar but still let the user save the + * file, as it would keep the raw file name internally. This can happen + * if the user has not set the G_FILENAME_ENCODING + * environment variable even though he has files whose names are not + * encoded in UTF-8. + * + * + * If your user interface lets the user type a file name for saving or + * renaming, convert it to the encoding used for file names in the file + * system by using g_filename_from_utf8(). Pass the converted file name + * to functions like fopen(3). If conversion fails, ask the user to enter + * a different file name. This can happen if the user types Japanese + * characters when G_FILENAME_ENCODING is set to + * ISO-8859-1, for example. + * + * orderedlist> + * + *
+ */ + /* We try to terminate strings in unknown charsets with this many zero bytes * to ensure that multibyte strings really are nul-terminated when we return * them from g_convert() and friends. diff --git a/glib/gconvert.h b/glib/gconvert.h index c4f274fb9..e4c20d77d 100644 --- a/glib/gconvert.h +++ b/glib/gconvert.h @@ -35,7 +35,19 @@ G_BEGIN_DECLS -typedef enum +/** + * GConvertError: + * @G_CONVERT_ERROR_NO_CONVERSION: Conversion between the requested character + * sets is not supported. + * @G_CONVERT_ERROR_ILLEGAL_SEQUENCE: Invalid byte sequence in conversion input. + * @G_CONVERT_ERROR_FAILED: Conversion failed for some reason. + * @G_CONVERT_ERROR_PARTIAL_INPUT: Partial character sequence at end of input. + * @G_CONVERT_ERROR_BAD_URI: URI is invalid. + * @G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: Pathname is not an absolute path. + * + * Error codes returned by character set conversion routines. + */ +typedef enum { G_CONVERT_ERROR_NO_CONVERSION, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, @@ -45,10 +57,22 @@ typedef enum G_CONVERT_ERROR_NOT_ABSOLUTE_PATH } GConvertError; +/** + * G_CONVERT_ERROR: + * + * Error domain for character set conversions. Errors in this domain will + * be from the #GConvertError enumeration. See #GError for information on + * error domains. + */ #define G_CONVERT_ERROR g_convert_error_quark() GQuark g_convert_error_quark (void); -/* Thin wrappers around iconv +/** + * GIconv: + * + * The GIConv struct wraps an + * iconv() conversion descriptor. It contains private data + * and should only be accessed using the following functions. */ typedef struct _GIConv *GIConv;