mirror of
				https://gitlab.gnome.org/GNOME/glib.git
				synced 2025-11-04 01:58:54 +01:00 
			
		
		
		
	New section on file name encodings.
2004-06-15 Federico Mena Quintero <federico@ximian.com> * glib/tmpl/conversions.sgml: New section on file name encodings. * glib/file-name-encodings.sxd: New diagram of how file name encodings work. * glib/file-name-encodings.png: Same as above, for inclusion in the generated docs. * glib/Makefile.am (HTML_IMAGES): Add file-name-encodings.png. (EXTRA_DIST): Add the new images. * glib/running.sgml: Add ids to the corresponding paragraphs that describe G_FILENAME_ENCODING and G_BROKEN_FILENAMES, to be able to reference them from elsewhere.
This commit is contained in:
		
				
					committed by
					
						
						Federico Mena Quintero
					
				
			
			
				
	
			
			
			
						parent
						
							f65f7608ab
						
					
				
				
					commit
					d131bae53d
				
			@@ -1,3 +1,20 @@
 | 
			
		||||
2004-06-15  Federico Mena Quintero  <federico@ximian.com>
 | 
			
		||||
 | 
			
		||||
	* glib/tmpl/conversions.sgml: New section on file name encodings.
 | 
			
		||||
 | 
			
		||||
	* glib/file-name-encodings.sxd: New diagram of how file name
 | 
			
		||||
	encodings work.
 | 
			
		||||
 | 
			
		||||
	* glib/file-name-encodings.png: Same as above, for inclusion in
 | 
			
		||||
	the generated docs.
 | 
			
		||||
 | 
			
		||||
	* glib/Makefile.am (HTML_IMAGES): Add file-name-encodings.png.
 | 
			
		||||
	(EXTRA_DIST): Add the new images.
 | 
			
		||||
 | 
			
		||||
	* glib/running.sgml: Add ids to the corresponding paragraphs that
 | 
			
		||||
	describe G_FILENAME_ENCODING and G_BROKEN_FILENAMES, to be able to
 | 
			
		||||
	reference them from elsewhere.
 | 
			
		||||
 | 
			
		||||
Thu Jun 10 21:29:55 2004  Matthias Clasen  <maclas@gmx.de>
 | 
			
		||||
 | 
			
		||||
	* glib/tmpl/modules.sgml: Add an example for GModule
 | 
			
		||||
 
 | 
			
		||||
@@ -39,6 +39,7 @@ MKDB_OPTIONS=--sgml-mode --output-format=xml --ignore-files=trio
 | 
			
		||||
 | 
			
		||||
# Images to copy into HTML directory
 | 
			
		||||
HTML_IMAGES =  				\
 | 
			
		||||
	file-name-encodings.png		\
 | 
			
		||||
	mainloop-states.gif
 | 
			
		||||
 | 
			
		||||
# Extra SGML files that are included by $(DOC_MAIN_SGML_FILE)
 | 
			
		||||
@@ -60,6 +61,8 @@ include $(top_srcdir)/gtk-doc.make
 | 
			
		||||
 | 
			
		||||
# Other files to distribute
 | 
			
		||||
EXTRA_DIST +=				\
 | 
			
		||||
	file-name-encodings.png		\
 | 
			
		||||
	file-name-encodings.sxd		\
 | 
			
		||||
	mainloop-states.fig		\
 | 
			
		||||
	mainloop-states.png		\
 | 
			
		||||
	mainloop-states.eps		\
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								docs/reference/glib/file-name-encodings.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								docs/reference/glib/file-name-encodings.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 30 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								docs/reference/glib/file-name-encodings.sxd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								docs/reference/glib/file-name-encodings.sxd
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@@ -23,7 +23,7 @@ GLib inspects a few of environment variables in addition to standard
 | 
			
		||||
variables like <envar>LANG</envar>, <envar>PATH</envar> or <envar>HOME</envar>. 
 | 
			
		||||
</para>
 | 
			
		||||
 | 
			
		||||
<formalpara>
 | 
			
		||||
<formalpara id="G_FILENAME_ENCODING">
 | 
			
		||||
  <title><envar>G_FILENAME_ENCODING</envar></title>
 | 
			
		||||
 | 
			
		||||
  <para>
 | 
			
		||||
@@ -34,7 +34,7 @@ variables like <envar>LANG</envar>, <envar>PATH</envar> or <envar>HOME</envar>.
 | 
			
		||||
  </para>
 | 
			
		||||
</formalpara>
 | 
			
		||||
 | 
			
		||||
<formalpara>
 | 
			
		||||
<formalpara id="G_BROKEN_FILENAMES">
 | 
			
		||||
  <title><envar>G_BROKEN_FILENAMES</envar></title>
 | 
			
		||||
 | 
			
		||||
  <para>
 | 
			
		||||
 
 | 
			
		||||
@@ -9,6 +9,153 @@ convert strings between different character sets using <function>iconv()</functi
 | 
			
		||||
 | 
			
		||||
</para>
 | 
			
		||||
 | 
			
		||||
    <refsect2 id="file-name-encodings">
 | 
			
		||||
      <title>File Name Encodings</title>
 | 
			
		||||
 | 
			
		||||
      <para>
 | 
			
		||||
	Historically, Unix has not had a defined encoding for file
 | 
			
		||||
	names:  a file name is valid as long as it does not have path
 | 
			
		||||
	separators in it ("/").  However, displaying file names may
 | 
			
		||||
	require conversion:  from the character set in which they were
 | 
			
		||||
	created, to the character set in which the application
 | 
			
		||||
	operates.  Consider the Spanish file name
 | 
			
		||||
	"<filename>Presentación.sxi</filename>".  If the
 | 
			
		||||
	application which created it uses ISO-8859-1 for its encoding,
 | 
			
		||||
	then the actual file name on disk would look like this:
 | 
			
		||||
      </para>
 | 
			
		||||
 | 
			
		||||
      <programlisting id="filename-iso8859-1">
 | 
			
		||||
Character:  P  r  e  s  e  n  t  a  c  i  ó  n  .  s  x  i
 | 
			
		||||
Hex code:   50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69
 | 
			
		||||
      </programlisting>
 | 
			
		||||
 | 
			
		||||
      <para>
 | 
			
		||||
	However, if the application use UTF-8, the actual file name on
 | 
			
		||||
	disk would look like this:
 | 
			
		||||
      </para>
 | 
			
		||||
 | 
			
		||||
      <programlisting id="filename-utf-8">
 | 
			
		||||
Character:  P  r  e  s  e  n  t  a  c  i  ó     n  .  s  x  i
 | 
			
		||||
Hex code:   50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69
 | 
			
		||||
      </programlisting>
 | 
			
		||||
 | 
			
		||||
      <para>
 | 
			
		||||
	Glib uses UTF-8 for its strings, and GUI toolkits like GTK+
 | 
			
		||||
	that use Glib do the same thing.  If you get a file name from
 | 
			
		||||
	the file system, for example, from
 | 
			
		||||
	<function>readdir(3)</function> or from <link
 | 
			
		||||
	linkend="g_dir_read_name"><function>g_dir_read_name()</function></link>,
 | 
			
		||||
	and you wish to display the file name to the user, you
 | 
			
		||||
	<emphasis>will</emphasis> need to convert it into UTF-8.  The
 | 
			
		||||
	opposite case is when the user types the name of a file he
 | 
			
		||||
	wishes to save:  the toolkit will give you that string in
 | 
			
		||||
	UTF-8 encoding, and you will need to convert it to the
 | 
			
		||||
	character set used for file names before you can create the
 | 
			
		||||
	file with <function>open(2)</function> or
 | 
			
		||||
	<function>fopen(3)</function>.
 | 
			
		||||
      </para>
 | 
			
		||||
 | 
			
		||||
      <para>
 | 
			
		||||
	By default, Glib assumes that file names on disk are in UTF-8
 | 
			
		||||
	encoding.  This is a valid assumption for file systems which
 | 
			
		||||
	were created relatively recently:  most applications use UTF-8
 | 
			
		||||
	encoding for their strings, and that is also what they use for
 | 
			
		||||
	the file names they create.  However, older file systems may
 | 
			
		||||
	still contain file names created in "older" encodings, such as
 | 
			
		||||
	ISO-8859-1.  In this case, for compatibility reasons, you may
 | 
			
		||||
	want to instruct Glib to use that particular encoding for file
 | 
			
		||||
	names rather than UTF-8.  You can do this by specifying the
 | 
			
		||||
	encoding for file names in the <link
 | 
			
		||||
	linkend="G_FILENAME_ENCODING"><envar>G_FILENAME_ENCODING</envar></link>
 | 
			
		||||
	environment variable.  For example, if your installation uses
 | 
			
		||||
	ISO-8859-1 for file names, you can put this in your
 | 
			
		||||
	<filename>~/.profile</filename>:
 | 
			
		||||
      </para>
 | 
			
		||||
 | 
			
		||||
      <programlisting>
 | 
			
		||||
export G_FILENAME_ENCODING=ISO-8859-1
 | 
			
		||||
      </programlisting>
 | 
			
		||||
 | 
			
		||||
      <para>
 | 
			
		||||
	Glib provides the functions <link
 | 
			
		||||
	linkend="g_filename_to_utf8"><function>g_filename_to_utf8()</function></link>
 | 
			
		||||
	and <link
 | 
			
		||||
	linkend="g_filename_from_utf8"><function>g_filename_from_utf8()</function></link>
 | 
			
		||||
	to perform the necessary conversions.  These functions convert
 | 
			
		||||
	file names from the encoding specified in
 | 
			
		||||
	<envar>G_FILENAME_ENCODING</envar> to UTF-8 and vice-versa.
 | 
			
		||||
	<xref linkend="file-name-encodings-diagram"/> illustrates how
 | 
			
		||||
	these functions are used to convert between UTF-8 and the
 | 
			
		||||
	encoding for file names in the file system.
 | 
			
		||||
      </para>
 | 
			
		||||
 | 
			
		||||
      <figure id="file-name-encodings-diagram">
 | 
			
		||||
	<title>Conversion between File Name Encodings</title>
 | 
			
		||||
	<graphic fileref="file-name-encodings.png" format="PNG"/>
 | 
			
		||||
      </figure>
 | 
			
		||||
 | 
			
		||||
      <refsect3 id="file-name-encodings-checklist">
 | 
			
		||||
	<title>Checklist for Application Writers</title>
 | 
			
		||||
 | 
			
		||||
	<para>
 | 
			
		||||
	  This section is a practical summary of the detailed
 | 
			
		||||
	  description above.  You can use this as a checklist of
 | 
			
		||||
	  things to do to make sure your applications process file
 | 
			
		||||
	  name encodings correctly.
 | 
			
		||||
	</para>
 | 
			
		||||
 | 
			
		||||
	<orderedlist>
 | 
			
		||||
	  <listitem>
 | 
			
		||||
	    <para>
 | 
			
		||||
	      If you get a file name from the file system from a
 | 
			
		||||
	      function such as <function>readdir(3)</function> or
 | 
			
		||||
	      <function>gtk_file_chooser_get_filename()</function>,
 | 
			
		||||
	      you do not need to do any conversion to pass that
 | 
			
		||||
	      file name to functions like <function>open(2)</function>,
 | 
			
		||||
	      <function>rename(2)</function>, or
 | 
			
		||||
	      <function>fopen(3)</function> — those are "raw"
 | 
			
		||||
	      file names which the file system understands.
 | 
			
		||||
	    </para>
 | 
			
		||||
	  </listitem>
 | 
			
		||||
 | 
			
		||||
	  <listitem>
 | 
			
		||||
	    <para>
 | 
			
		||||
	      If you need to display a file name, convert it to UTF-8
 | 
			
		||||
	      first by using <link
 | 
			
		||||
	      linkend="g_filename_to_utf8"><function>g_filename_to_utf8()</function></link>.
 | 
			
		||||
	      If conversion fails, display a string like
 | 
			
		||||
	      "<literal>Unknown file name</literal>".  <emphasis>Do
 | 
			
		||||
	      not</emphasis> convert this string back into the
 | 
			
		||||
	      encoding used for file names if you wish to pass it to
 | 
			
		||||
	      the file system; use the original file name instead.
 | 
			
		||||
	      For example, the document window of a word processor
 | 
			
		||||
	      could display "Unknown file name" in its title bar but
 | 
			
		||||
	      still let the user save the file, as it would keep the
 | 
			
		||||
	      raw file name internally.  This can happen if the user
 | 
			
		||||
	      has not set the <envar>G_FILENAME_ENCODING</envar>
 | 
			
		||||
	      environment variable even though he has files whose
 | 
			
		||||
	      names are not encoded in UTF-8.
 | 
			
		||||
	    </para>
 | 
			
		||||
	  </listitem>
 | 
			
		||||
 | 
			
		||||
	  <listitem>
 | 
			
		||||
	    <para>
 | 
			
		||||
	      If your user interface lets the user type a file name
 | 
			
		||||
	      for saving or renaming, convert it to the encoding used
 | 
			
		||||
	      for file names in the file system by using <link
 | 
			
		||||
	      linkend="g_filename_from_utf8"><function>g_filename_from_utf8()</function></link>.
 | 
			
		||||
	      Pass the converted file name to functions like
 | 
			
		||||
	      <function>fopen(3)</function>.  If conversion fails, ask
 | 
			
		||||
	      the user to enter a different file name.  This can
 | 
			
		||||
	      happen if the user types Japanese characters when
 | 
			
		||||
	      <envar>G_FILENAME_ENCODING</envar> is set to
 | 
			
		||||
	      <literal>ISO-8859-1</literal>, for example.
 | 
			
		||||
	    </para>
 | 
			
		||||
	  </listitem>
 | 
			
		||||
	</orderedlist>
 | 
			
		||||
      </refsect3>
 | 
			
		||||
    </refsect2>
 | 
			
		||||
 | 
			
		||||
<!-- ##### SECTION See_Also ##### -->
 | 
			
		||||
<para>
 | 
			
		||||
 | 
			
		||||
@@ -204,3 +351,11 @@ is not supported.
 | 
			
		||||
@Returns: 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
<!--
 | 
			
		||||
Local variables:
 | 
			
		||||
mode: sgml
 | 
			
		||||
sgml-parent-document: ("../glib-docs.sgml" "book" "refentry")
 | 
			
		||||
End:
 | 
			
		||||
-->
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user