mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-13 07:56:17 +01:00
Move GConvert docs inline
This commit is contained in:
parent
f837e15972
commit
def0dc01f7
@ -2,159 +2,10 @@
|
||||
Character Set Conversion
|
||||
|
||||
<!-- ##### SECTION Short_Description ##### -->
|
||||
convert strings between different character sets using iconv()
|
||||
|
||||
|
||||
<!-- ##### SECTION Long_Description ##### -->
|
||||
<para>
|
||||
|
||||
</para>
|
||||
|
||||
<refsect2 id="file-name-encodings">
|
||||
<title>File Name Encodings</title>
|
||||
|
||||
<para>
|
||||
Historically, Unix has not had a defined encoding for file
|
||||
names: a file name is valid as long as it does not have path
|
||||
separators in it ("/"). However, displaying file names may
|
||||
require conversion: from the character set in which they were
|
||||
created, to the character set in which the application
|
||||
operates. Consider the Spanish file name
|
||||
"<filename>Presentación.sxi</filename>". If the
|
||||
application which created it uses ISO-8859-1 for its encoding,
|
||||
then the actual file name on disk would look like this:
|
||||
</para>
|
||||
|
||||
<programlisting id="filename-iso8859-1">
|
||||
Character: P r e s e n t a c i ó n . s x i
|
||||
Hex code: 50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69
|
||||
</programlisting>
|
||||
|
||||
<para>
|
||||
However, if the application use UTF-8, the actual file name on
|
||||
disk would look like this:
|
||||
</para>
|
||||
|
||||
<programlisting id="filename-utf-8">
|
||||
Character: P r e s e n t a c i ó n . s x i
|
||||
Hex code: 50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69
|
||||
</programlisting>
|
||||
|
||||
<para>
|
||||
Glib uses UTF-8 for its strings, and GUI toolkits like GTK+
|
||||
that use Glib do the same thing. If you get a file name from
|
||||
the file system, for example, from
|
||||
<function>readdir(3)</function> or from <link
|
||||
linkend="g_dir_read_name"><function>g_dir_read_name()</function></link>,
|
||||
and you wish to display the file name to the user, you
|
||||
<emphasis>will</emphasis> need to convert it into UTF-8. The
|
||||
opposite case is when the user types the name of a file he
|
||||
wishes to save: the toolkit will give you that string in
|
||||
UTF-8 encoding, and you will need to convert it to the
|
||||
character set used for file names before you can create the
|
||||
file with <function>open(2)</function> or
|
||||
<function>fopen(3)</function>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
By default, Glib assumes that file names on disk are in UTF-8
|
||||
encoding. This is a valid assumption for file systems which
|
||||
were created relatively recently: most applications use UTF-8
|
||||
encoding for their strings, and that is also what they use for
|
||||
the file names they create. However, older file systems may
|
||||
still contain file names created in "older" encodings, such as
|
||||
ISO-8859-1. In this case, for compatibility reasons, you may
|
||||
want to instruct Glib to use that particular encoding for file
|
||||
names rather than UTF-8. You can do this by specifying the
|
||||
encoding for file names in the <link
|
||||
linkend="G_FILENAME_ENCODING"><envar>G_FILENAME_ENCODING</envar></link>
|
||||
environment variable. For example, if your installation uses
|
||||
ISO-8859-1 for file names, you can put this in your
|
||||
<filename>~/.profile</filename>:
|
||||
</para>
|
||||
|
||||
<programlisting>
|
||||
export G_FILENAME_ENCODING=ISO-8859-1
|
||||
</programlisting>
|
||||
|
||||
<para>
|
||||
Glib provides the functions <link
|
||||
linkend="g_filename_to_utf8"><function>g_filename_to_utf8()</function></link>
|
||||
and <link
|
||||
linkend="g_filename_from_utf8"><function>g_filename_from_utf8()</function></link>
|
||||
to perform the necessary conversions. These functions convert
|
||||
file names from the encoding specified in
|
||||
<envar>G_FILENAME_ENCODING</envar> to UTF-8 and vice-versa.
|
||||
<xref linkend="file-name-encodings-diagram"/> illustrates how
|
||||
these functions are used to convert between UTF-8 and the
|
||||
encoding for file names in the file system.
|
||||
</para>
|
||||
|
||||
<figure id="file-name-encodings-diagram">
|
||||
<title>Conversion between File Name Encodings</title>
|
||||
<graphic fileref="file-name-encodings.png" format="PNG"/>
|
||||
</figure>
|
||||
|
||||
<refsect3 id="file-name-encodings-checklist">
|
||||
<title>Checklist for Application Writers</title>
|
||||
|
||||
<para>
|
||||
This section is a practical summary of the detailed
|
||||
description above. You can use this as a checklist of
|
||||
things to do to make sure your applications process file
|
||||
name encodings correctly.
|
||||
</para>
|
||||
|
||||
<orderedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
If you get a file name from the file system from a
|
||||
function such as <function>readdir(3)</function> or
|
||||
<function>gtk_file_chooser_get_filename()</function>,
|
||||
you do not need to do any conversion to pass that
|
||||
file name to functions like <function>open(2)</function>,
|
||||
<function>rename(2)</function>, or
|
||||
<function>fopen(3)</function> — those are "raw"
|
||||
file names which the file system understands.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
If you need to display a file name, convert it to UTF-8
|
||||
first by using <link
|
||||
linkend="g_filename_to_utf8"><function>g_filename_to_utf8()</function></link>.
|
||||
If conversion fails, display a string like
|
||||
"<literal>Unknown file name</literal>". <emphasis>Do
|
||||
not</emphasis> convert this string back into the
|
||||
encoding used for file names if you wish to pass it to
|
||||
the file system; use the original file name instead.
|
||||
For example, the document window of a word processor
|
||||
could display "Unknown file name" in its title bar but
|
||||
still let the user save the file, as it would keep the
|
||||
raw file name internally. This can happen if the user
|
||||
has not set the <envar>G_FILENAME_ENCODING</envar>
|
||||
environment variable even though he has files whose
|
||||
names are not encoded in UTF-8.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
If your user interface lets the user type a file name
|
||||
for saving or renaming, convert it to the encoding used
|
||||
for file names in the file system by using <link
|
||||
linkend="g_filename_from_utf8"><function>g_filename_from_utf8()</function></link>.
|
||||
Pass the converted file name to functions like
|
||||
<function>fopen(3)</function>. If conversion fails, ask
|
||||
the user to enter a different file name. This can
|
||||
happen if the user types Japanese characters when
|
||||
<envar>G_FILENAME_ENCODING</envar> is set to
|
||||
<literal>ISO-8859-1</literal>, for example.
|
||||
</para>
|
||||
</listitem>
|
||||
</orderedlist>
|
||||
</refsect3>
|
||||
</refsect2>
|
||||
|
||||
<!-- ##### SECTION See_Also ##### -->
|
||||
<para>
|
||||
@ -200,9 +51,7 @@ export G_FILENAME_ENCODING=ISO-8859-1
|
||||
|
||||
<!-- ##### STRUCT GIConv ##### -->
|
||||
<para>
|
||||
The <structname>GIConv</structname> struct wraps an
|
||||
<function>iconv()</function> conversion descriptor. It contains private data
|
||||
and should only be accessed using the following functions.
|
||||
|
||||
</para>
|
||||
|
||||
|
||||
@ -222,9 +71,7 @@ and should only be accessed using the following functions.
|
||||
|
||||
<!-- ##### MACRO G_CONVERT_ERROR ##### -->
|
||||
<para>
|
||||
Error domain for character set conversions. Errors in this domain will
|
||||
be from the #GConvertError enumeration. See #GError for information on
|
||||
error domains.
|
||||
|
||||
</para>
|
||||
|
||||
|
||||
@ -342,16 +189,15 @@ error domains.
|
||||
|
||||
<!-- ##### ENUM GConvertError ##### -->
|
||||
<para>
|
||||
Error codes returned by character set conversion routines.
|
||||
|
||||
</para>
|
||||
|
||||
@G_CONVERT_ERROR_NO_CONVERSION: Conversion between the requested character sets
|
||||
is not supported.
|
||||
@G_CONVERT_ERROR_ILLEGAL_SEQUENCE: Invalid byte sequence in conversion input.
|
||||
@G_CONVERT_ERROR_FAILED: Conversion failed for some reason.
|
||||
@G_CONVERT_ERROR_PARTIAL_INPUT: Partial character sequence at end of input.
|
||||
@G_CONVERT_ERROR_BAD_URI: URI is invalid.
|
||||
@G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: Pathname is not an absolute path.
|
||||
@G_CONVERT_ERROR_NO_CONVERSION:
|
||||
@G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
|
||||
@G_CONVERT_ERROR_FAILED:
|
||||
@G_CONVERT_ERROR_PARTIAL_INPUT:
|
||||
@G_CONVERT_ERROR_BAD_URI:
|
||||
@G_CONVERT_ERROR_NOT_ABSOLUTE_PATH:
|
||||
|
||||
<!-- ##### FUNCTION g_get_charset ##### -->
|
||||
<para>
|
||||
|
121
glib/gconvert.c
121
glib/gconvert.c
@ -56,6 +56,127 @@
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* SECTION:conversions
|
||||
* @title: Character Set Conversion
|
||||
* @short_description: Convert strings between different character sets
|
||||
*
|
||||
* The g_convert() family of function wraps the functionality of iconv(). In
|
||||
* addition to pure character set conversions, GLib has functions to deal
|
||||
* with the extra complications of encodings for file names.
|
||||
*
|
||||
* <refsect2 id="file-name-encodings">
|
||||
* <title>File Name Encodings</title>
|
||||
* <para>
|
||||
* Historically, Unix has not had a defined encoding for file
|
||||
* names: a file name is valid as long as it does not have path
|
||||
* separators in it ("/"). However, displaying file names may
|
||||
* require conversion: from the character set in which they were
|
||||
* created, to the character set in which the application
|
||||
* operates. Consider the Spanish file name
|
||||
* "<filename>Presentación.sxi</filename>". If the
|
||||
* application which created it uses ISO-8859-1 for its encoding,
|
||||
* </para>
|
||||
* <programlisting id="filename-iso8859-1">
|
||||
* Character: P r e s e n t a c i ó n . s x i
|
||||
* Hex code: 50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69
|
||||
* </programlisting>
|
||||
* <para>
|
||||
* However, if the application use UTF-8, the actual file name on
|
||||
* disk would look like this:
|
||||
* </para>
|
||||
* <programlisting id="filename-utf-8">
|
||||
* Character: P r e s e n t a c i ó n . s x i
|
||||
* Hex code: 50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69
|
||||
* </programlisting>
|
||||
* <para>
|
||||
* Glib uses UTF-8 for its strings, and GUI toolkits like GTK+
|
||||
* that use Glib do the same thing. If you get a file name from
|
||||
* the file system, for example, from readdir(3) or from g_dir_read_name(),
|
||||
* and you wish to display the file name to the user, you
|
||||
* emphasis>will</emphasis> need to convert it into UTF-8. The
|
||||
* opposite case is when the user types the name of a file he
|
||||
* wishes to save: the toolkit will give you that string in
|
||||
* UTF-8 encoding, and you will need to convert it to the
|
||||
* character set used for file names before you can create the
|
||||
* file with open(2) or fopen(3).
|
||||
* </para>
|
||||
* <para>
|
||||
* By default, Glib assumes that file names on disk are in UTF-8
|
||||
* encoding. This is a valid assumption for file systems which
|
||||
* were created relatively recently: most applications use UTF-8
|
||||
* encoding for their strings, and that is also what they use for
|
||||
* the file names they create. However, older file systems may
|
||||
* still contain file names created in "older" encodings, such as
|
||||
* ISO-8859-1. In this case, for compatibility reasons, you may
|
||||
* want to instruct Glib to use that particular encoding for file
|
||||
* names rather than UTF-8. You can do this by specifying the
|
||||
* encoding for file names in the <link
|
||||
* linkend="G_FILENAME_ENCODING"><envar>G_FILENAME_ENCODING</envar></link>
|
||||
* environment variable. For example, if your installation uses
|
||||
* ISO-8859-1 for file names, you can put this in your
|
||||
* <filename>~/.profile</filename>:
|
||||
* </para>
|
||||
* <programlisting>
|
||||
* export G_FILENAME_ENCODING=ISO-8859-1
|
||||
* </programlisting>
|
||||
* <para>
|
||||
* Glib provides the functions g_filename_to_utf8() and
|
||||
* g_filename_from_utf8() to perform the necessary conversions. These
|
||||
* functions convert file names from the encoding specified in
|
||||
* <envar>G_FILENAME_ENCODING</envar> to UTF-8 and vice-versa.
|
||||
* <xref linkend="file-name-encodings-diagram"/> illustrates how
|
||||
* these functions are used to convert between UTF-8 and the
|
||||
* encoding for file names in the file system.
|
||||
* </para>
|
||||
* <figure id="file-name-encodings-diagram">
|
||||
* <title>Conversion between File Name Encodings</title>
|
||||
* <graphic fileref="file-name-encodings.png" format="PNG"/>
|
||||
* </figure>
|
||||
* <refsect3 id="file-name-encodings-checklist">
|
||||
* <title>Checklist for Application Writers</title>
|
||||
* <para>
|
||||
* This section is a practical summary of the detailed
|
||||
* description above. You can use this as a checklist of
|
||||
* things to do to make sure your applications process file
|
||||
* name encodings correctly.
|
||||
* </para>
|
||||
* <orderedlist>
|
||||
* <listitem><para>
|
||||
* If you get a file name from the file system from a function
|
||||
* such as readdir(3) or gtk_file_chooser_get_filename(),
|
||||
* you do not need to do any conversion to pass that
|
||||
* file name to functions like open(2), rename(2), or
|
||||
* fopen(3) — those are "raw" file names which the file
|
||||
* system understands.
|
||||
* </para></listitem>
|
||||
* <listitem><para>
|
||||
* If you need to display a file name, convert it to UTF-8 first by
|
||||
* using g_filename_to_utf8(). If conversion fails, display a string like
|
||||
* "<literal>Unknown file name</literal>". <emphasis>Do not</emphasis>
|
||||
* convert this string back into the encoding used for file names if you
|
||||
* wish to pass it to the file system; use the original file name instead.
|
||||
* For example, the document window of a word processor could display
|
||||
* "Unknown file name" in its title bar but still let the user save the
|
||||
* file, as it would keep the raw file name internally. This can happen
|
||||
* if the user has not set the <envar>G_FILENAME_ENCODING</envar>
|
||||
* environment variable even though he has files whose names are not
|
||||
* encoded in UTF-8.
|
||||
* </para></listitem>
|
||||
* <listitem><para>
|
||||
* If your user interface lets the user type a file name for saving or
|
||||
* renaming, convert it to the encoding used for file names in the file
|
||||
* system by using g_filename_from_utf8(). Pass the converted file name
|
||||
* to functions like fopen(3). If conversion fails, ask the user to enter
|
||||
* a different file name. This can happen if the user types Japanese
|
||||
* characters when <envar>G_FILENAME_ENCODING</envar> is set to
|
||||
* <literal>ISO-8859-1</literal>, for example.
|
||||
* </para></listitem>
|
||||
* orderedlist>
|
||||
* </refsect3>
|
||||
* </refsect2>
|
||||
*/
|
||||
|
||||
/* We try to terminate strings in unknown charsets with this many zero bytes
|
||||
* to ensure that multibyte strings really are nul-terminated when we return
|
||||
* them from g_convert() and friends.
|
||||
|
@ -35,7 +35,19 @@
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
typedef enum
|
||||
/**
|
||||
* GConvertError:
|
||||
* @G_CONVERT_ERROR_NO_CONVERSION: Conversion between the requested character
|
||||
* sets is not supported.
|
||||
* @G_CONVERT_ERROR_ILLEGAL_SEQUENCE: Invalid byte sequence in conversion input.
|
||||
* @G_CONVERT_ERROR_FAILED: Conversion failed for some reason.
|
||||
* @G_CONVERT_ERROR_PARTIAL_INPUT: Partial character sequence at end of input.
|
||||
* @G_CONVERT_ERROR_BAD_URI: URI is invalid.
|
||||
* @G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: Pathname is not an absolute path.
|
||||
*
|
||||
* Error codes returned by character set conversion routines.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
G_CONVERT_ERROR_NO_CONVERSION,
|
||||
G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
|
||||
@ -45,10 +57,22 @@ typedef enum
|
||||
G_CONVERT_ERROR_NOT_ABSOLUTE_PATH
|
||||
} GConvertError;
|
||||
|
||||
/**
|
||||
* G_CONVERT_ERROR:
|
||||
*
|
||||
* Error domain for character set conversions. Errors in this domain will
|
||||
* be from the #GConvertError enumeration. See #GError for information on
|
||||
* error domains.
|
||||
*/
|
||||
#define G_CONVERT_ERROR g_convert_error_quark()
|
||||
GQuark g_convert_error_quark (void);
|
||||
|
||||
/* Thin wrappers around iconv
|
||||
/**
|
||||
* GIconv:
|
||||
*
|
||||
* The <structname>GIConv</structname> struct wraps an
|
||||
* iconv() conversion descriptor. It contains private data
|
||||
* and should only be accessed using the following functions.
|
||||
*/
|
||||
typedef struct _GIConv *GIConv;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user