glib.h New functions for conversion between UTF-8 and the encoding

2000-02-01  Tor Lillqvist  <tml@iki.fi>

* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().

Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.

* README.win32
* build-dll
* glib.def: Minor updates.
This commit is contained in:
Tor Lillqvist 2000-02-02 23:39:32 +00:00 committed by Tor Lillqvist
parent 86b2741c1e
commit c22cf34e92
16 changed files with 637 additions and 10 deletions

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org> Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h: * glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org> Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h: * glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org> Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h: * glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org> Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h: * glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org> Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h: * glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org> Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h: * glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org> Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h: * glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org> Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h: * glib.h:

View File

@ -54,6 +54,14 @@ changed to G_OS_WIN32.
G_OS_WIN32 implies using the Microsoft C runtime MSVCRT.DLL. G_OS_WIN32 implies using the Microsoft C runtime MSVCRT.DLL.
Building software that use GLib or GTk+
=======================================
Unfortunately, even building software that just *use* GLib or GTk+
also require to have the right compiler set up the right way, so if
you intend to use gcc, follow the relevant instructions below in that
case, too.
Pthreads library Pthreads library
================ ================
@ -91,12 +99,9 @@ Building with gcc
I use the latest and greatest gcc, gcc-2.95.2. 2.95 will also work. I use the latest and greatest gcc, gcc-2.95.2. 2.95 will also work.
Earlier version might, but you are on your own. Earlier version might, but you are on your own.
Read and understand these instruction carefully. If you don't Read these instruction carefully and understand them. If you don't
understand or can't follow the instructions, you probably shouldn't understand or can't follow the instructions, you probably shouldn't
want to build GLib (or GTk+ or GIMP) yourself anyway. Unfortunately, want to build GLib (or GTk+ or GIMP) yourself anyway.
even building software that just *use* GLib or GTk+ also require to
have the right compiler set up the right way, so follow these
instructions in that case, too.
0) Get and install Cygwin B20.1. 0) Get and install Cygwin B20.1.
@ -270,6 +275,26 @@ diff -ru2 ./w32api/include/wingdi.h ../../src/mingw-runtime-19991107/w32api/incl
================ cut here ================ ================ cut here ================
fpos_t should be long long with MSVCRT.DLL:
================ cut here ================
--- stdio.h~ Thu Aug 19 02:47:42 1999
+++ stdio.h Mon Jan 17 21:58:20 2000
@@ -296,6 +296,11 @@
* it is fairly evident that the fpos_t type is a long (in CRTDLL.DLL).
* Perhaps an unsigned long? TODO?
+ * In MSVCRT.DLL it's a long long, however.
*/
+#ifdef __MSVCRT__
+typedef long long fpos_t;
+#else
typedef long fpos_t;
+#endif
int fgetpos (FILE* fileGetPosition, fpos_t* pfpos);
================ cut here ================
(I haven't checked yet if other small errors I have noticed in (I haven't checked yet if other small errors I have noticed in
previous w32api header versions have been corrected.) previous w32api header versions have been corrected.)

View File

@ -1,10 +1,10 @@
#!/bin/bash #!/bin/bash
# Temporary hack until building dlls or executables with exported # Temporary hack until building dlls is easier with gcc -mno-cygwin
# entry points is easier with gcc -mno-cygwin ("mingw32"). # ("mingw32").
# This is usable with cygwin b20.1 and egcs-2.91.66 19990314 # This is usable with cygwin b20.1 and egcs-2.91.66 19990314
# (egcs-1.1.2 release) or gcc-2.95 as distributed by Mumit Khan. For # (egcs-1.1.2 release) or gcc-2.95(.2) as distributed by Mumit Khan. For
# other combinations, no idea. # other combinations, no idea.
GCC="gcc" GCC="gcc"

View File

@ -88,6 +88,8 @@ EXPORTS
g_direct_equal g_direct_equal
g_direct_hash g_direct_hash
g_dirname g_dirname
g_filename_from_utf8
g_filename_to_utf8
g_free g_free
g_get_current_dir g_get_current_dir
g_get_current_time g_get_current_time
@ -348,7 +350,6 @@ EXPORTS
g_source_remove g_source_remove
g_source_remove_by_source_data g_source_remove_by_source_data
g_source_remove_by_user_data g_source_remove_by_user_data
g_spaced_primes_closest
g_static_mutex_get_mutex_impl g_static_mutex_get_mutex_impl
g_static_private_get g_static_private_get
g_static_private_set g_static_private_set
@ -446,3 +447,4 @@ EXPORTS
glib_major_version glib_major_version
glib_micro_version glib_micro_version
glib_minor_version glib_minor_version
g_spaced_primes_closest

7
glib.h
View File

@ -1604,6 +1604,13 @@ gchar* g_strcompress (const gchar *source);
*/ */
gchar* g_strescape (const gchar *source, gchar* g_strescape (const gchar *source,
const gchar *exceptions); const gchar *exceptions);
/*
* Convert between the operating system (or C runtime)
* representation of file names and UTF-8.
*/
gchar* g_filename_to_utf8 (const gchar *opsysstring);
gchar* g_filename_from_utf8 (const gchar *utf8string);
/* Deprecated API: /* Deprecated API:
* gchar* g_strescape (const gchar *source); * gchar* g_strescape (const gchar *source);
* Luckily this function wasn't much used. * Luckily this function wasn't much used.

View File

@ -88,6 +88,8 @@ EXPORTS
g_direct_equal g_direct_equal
g_direct_hash g_direct_hash
g_dirname g_dirname
g_filename_from_utf8
g_filename_to_utf8
g_free g_free
g_get_current_dir g_get_current_dir
g_get_current_time g_get_current_time
@ -348,7 +350,6 @@ EXPORTS
g_source_remove g_source_remove
g_source_remove_by_source_data g_source_remove_by_source_data
g_source_remove_by_user_data g_source_remove_by_user_data
g_spaced_primes_closest
g_static_mutex_get_mutex_impl g_static_mutex_get_mutex_impl
g_static_private_get g_static_private_get
g_static_private_set g_static_private_set
@ -446,3 +447,4 @@ EXPORTS
glib_major_version glib_major_version
glib_micro_version glib_micro_version
glib_minor_version glib_minor_version
g_spaced_primes_closest

View File

@ -1604,6 +1604,13 @@ gchar* g_strcompress (const gchar *source);
*/ */
gchar* g_strescape (const gchar *source, gchar* g_strescape (const gchar *source,
const gchar *exceptions); const gchar *exceptions);
/*
* Convert between the operating system (or C runtime)
* representation of file names and UTF-8.
*/
gchar* g_filename_to_utf8 (const gchar *opsysstring);
gchar* g_filename_from_utf8 (const gchar *utf8string);
/* Deprecated API: /* Deprecated API:
* gchar* g_strescape (const gchar *source); * gchar* g_strescape (const gchar *source);
* Luckily this function wasn't much used. * Luckily this function wasn't much used.

View File

@ -42,6 +42,11 @@
#include <signal.h> #include <signal.h>
#endif #endif
#include "glib.h" #include "glib.h"
#ifdef G_OS_WIN32
#include <windows.h>
#endif
/* do not include <unistd.h> in this place since it /* do not include <unistd.h> in this place since it
* inteferes with g_strsignal() on some OSes * inteferes with g_strsignal() on some OSes
*/ */
@ -1068,6 +1073,225 @@ g_strescape (const gchar *source,
return dest; return dest;
} }
/*
* g_filename_to_utf8
*
* Converts a string which is in the encoding used for file names by
* the C runtime (usually the same as that used by the operating
* system) in the current locale into a UTF-8 string.
*/
gchar *
g_filename_to_utf8 (const gchar *opsysstring)
{
#ifdef G_OS_WIN32
gint i, clen, wclen, first;
const gint len = strlen (opsysstring);
wchar_t *wcs, wc;
gchar *result, *bp;
const wchar_t *wcp;
wcs = g_new (wchar_t, len);
wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len);
wcp = wcs;
clen = 0;
for (i = 0; i < wclen; i++)
{
wc = *wcp++;
if (wc < 0x80)
clen += 1;
else if (wc < 0x800)
clen += 2;
else if (wc < 0x10000)
clen += 3;
else if (wc < 0x200000)
clen += 4;
else if (wc < 0x4000000)
clen += 5;
else
clen += 6;
}
result = g_malloc (clen + 1);
wcp = wcs;
bp = result;
for (i = 0; i < wclen; i++)
{
wc = *wcp++;
if (wc < 0x80)
{
first = 0;
clen = 1;
}
else if (wc < 0x800)
{
first = 0xc0;
clen = 2;
}
else if (wc < 0x10000)
{
first = 0xe0;
clen = 3;
}
else if (wc < 0x200000)
{
first = 0xf0;
clen = 4;
}
else if (wc < 0x4000000)
{
first = 0xf8;
clen = 5;
}
else
{
first = 0xfc;
clen = 6;
}
/* Woo-hoo! */
switch (clen)
{
case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 1: bp[0] = wc | first;
}
bp += clen;
}
*bp = 0;
g_free (wcs);
return result;
#else
return g_strdup (opsysstring);
#endif
}
/*
* g_filename_from_utf8
*
* The reverse of g_filename_to_utf8.
*/
gchar *
g_filename_from_utf8 (const gchar *utf8string)
{
#ifdef G_OS_WIN32
gint i, mask, clen, wclen, mblen;
const gint len = strlen (utf8string);
wchar_t *wcs, *wcp;
gchar *result;
guchar *cp, *end, c;
gint n;
/* First convert to wide chars */
cp = (guchar *) utf8string;
end = cp + len;
n = 0;
wcs = g_new (wchar_t, len + 1);
wcp = wcs;
while (cp != end)
{
mask = 0;
c = *cp;
if (c < 0x80)
{
clen = 1;
mask = 0x7f;
}
else if ((c & 0xe0) == 0xc0)
{
clen = 2;
mask = 0x1f;
}
else if ((c & 0xf0) == 0xe0)
{
clen = 3;
mask = 0x0f;
}
else if ((c & 0xf8) == 0xf0)
{
clen = 4;
mask = 0x07;
}
else if ((c & 0xfc) == 0xf8)
{
clen = 5;
mask = 0x03;
}
else if ((c & 0xfc) == 0xfc)
{
clen = 6;
mask = 0x01;
}
else
{
g_free (wcs);
return NULL;
}
if (cp + clen > end)
{
g_free (wcs);
return NULL;
}
*wcp = (cp[0] & mask);
for (i = 1; i < clen; i++)
{
if ((cp[i] & 0xc0) != 0x80)
{
g_free (wcs);
return NULL;
}
*wcp <<= 6;
*wcp |= (cp[i] & 0x3f);
}
cp += clen;
wcp++;
n++;
}
if (cp != end)
{
g_free (wcs);
return NULL;
}
/* n is the number of wide chars constructed */
/* Convert to a string in the current ANSI codepage */
result = g_new (gchar, 3 * n + 1);
mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL);
result[mblen] = 0;
g_free (wcs);
return result;
#else
return g_strdup (utf8string);
#endif
}
/* blame Elliot for these next five routines */ /* blame Elliot for these next five routines */
gchar* gchar*
g_strchug (gchar *string) g_strchug (gchar *string)

View File

@ -42,6 +42,11 @@
#include <signal.h> #include <signal.h>
#endif #endif
#include "glib.h" #include "glib.h"
#ifdef G_OS_WIN32
#include <windows.h>
#endif
/* do not include <unistd.h> in this place since it /* do not include <unistd.h> in this place since it
* inteferes with g_strsignal() on some OSes * inteferes with g_strsignal() on some OSes
*/ */
@ -1068,6 +1073,225 @@ g_strescape (const gchar *source,
return dest; return dest;
} }
/*
* g_filename_to_utf8
*
* Converts a string which is in the encoding used for file names by
* the C runtime (usually the same as that used by the operating
* system) in the current locale into a UTF-8 string.
*/
gchar *
g_filename_to_utf8 (const gchar *opsysstring)
{
#ifdef G_OS_WIN32
gint i, clen, wclen, first;
const gint len = strlen (opsysstring);
wchar_t *wcs, wc;
gchar *result, *bp;
const wchar_t *wcp;
wcs = g_new (wchar_t, len);
wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len);
wcp = wcs;
clen = 0;
for (i = 0; i < wclen; i++)
{
wc = *wcp++;
if (wc < 0x80)
clen += 1;
else if (wc < 0x800)
clen += 2;
else if (wc < 0x10000)
clen += 3;
else if (wc < 0x200000)
clen += 4;
else if (wc < 0x4000000)
clen += 5;
else
clen += 6;
}
result = g_malloc (clen + 1);
wcp = wcs;
bp = result;
for (i = 0; i < wclen; i++)
{
wc = *wcp++;
if (wc < 0x80)
{
first = 0;
clen = 1;
}
else if (wc < 0x800)
{
first = 0xc0;
clen = 2;
}
else if (wc < 0x10000)
{
first = 0xe0;
clen = 3;
}
else if (wc < 0x200000)
{
first = 0xf0;
clen = 4;
}
else if (wc < 0x4000000)
{
first = 0xf8;
clen = 5;
}
else
{
first = 0xfc;
clen = 6;
}
/* Woo-hoo! */
switch (clen)
{
case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 1: bp[0] = wc | first;
}
bp += clen;
}
*bp = 0;
g_free (wcs);
return result;
#else
return g_strdup (opsysstring);
#endif
}
/*
* g_filename_from_utf8
*
* The reverse of g_filename_to_utf8.
*/
gchar *
g_filename_from_utf8 (const gchar *utf8string)
{
#ifdef G_OS_WIN32
gint i, mask, clen, wclen, mblen;
const gint len = strlen (utf8string);
wchar_t *wcs, *wcp;
gchar *result;
guchar *cp, *end, c;
gint n;
/* First convert to wide chars */
cp = (guchar *) utf8string;
end = cp + len;
n = 0;
wcs = g_new (wchar_t, len + 1);
wcp = wcs;
while (cp != end)
{
mask = 0;
c = *cp;
if (c < 0x80)
{
clen = 1;
mask = 0x7f;
}
else if ((c & 0xe0) == 0xc0)
{
clen = 2;
mask = 0x1f;
}
else if ((c & 0xf0) == 0xe0)
{
clen = 3;
mask = 0x0f;
}
else if ((c & 0xf8) == 0xf0)
{
clen = 4;
mask = 0x07;
}
else if ((c & 0xfc) == 0xf8)
{
clen = 5;
mask = 0x03;
}
else if ((c & 0xfc) == 0xfc)
{
clen = 6;
mask = 0x01;
}
else
{
g_free (wcs);
return NULL;
}
if (cp + clen > end)
{
g_free (wcs);
return NULL;
}
*wcp = (cp[0] & mask);
for (i = 1; i < clen; i++)
{
if ((cp[i] & 0xc0) != 0x80)
{
g_free (wcs);
return NULL;
}
*wcp <<= 6;
*wcp |= (cp[i] & 0x3f);
}
cp += clen;
wcp++;
n++;
}
if (cp != end)
{
g_free (wcs);
return NULL;
}
/* n is the number of wide chars constructed */
/* Convert to a string in the current ANSI codepage */
result = g_new (gchar, 3 * n + 1);
mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL);
result[mblen] = 0;
g_free (wcs);
return result;
#else
return g_strdup (utf8string);
#endif
}
/* blame Elliot for these next five routines */ /* blame Elliot for these next five routines */
gchar* gchar*
g_strchug (gchar *string) g_strchug (gchar *string)