glib.h New functions for conversion between UTF-8 and the encoding

2000-02-01  Tor Lillqvist  <tml@iki.fi>

* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().

Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.

* README.win32
* build-dll
* glib.def: Minor updates.
This commit is contained in:
Tor Lillqvist 2000-02-02 23:39:32 +00:00 committed by Tor Lillqvist
parent 86b2741c1e
commit c22cf34e92
16 changed files with 637 additions and 10 deletions

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h:

View File

@ -1,3 +1,20 @@
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Wed Jan 26 05:24:38 2000 Tim Janik <timj@gtk.org>
* glib.h:

View File

@ -54,6 +54,14 @@ changed to G_OS_WIN32.
G_OS_WIN32 implies using the Microsoft C runtime MSVCRT.DLL.
Building software that use GLib or GTk+
=======================================
Unfortunately, even building software that just *use* GLib or GTk+
also require to have the right compiler set up the right way, so if
you intend to use gcc, follow the relevant instructions below in that
case, too.
Pthreads library
================
@ -91,12 +99,9 @@ Building with gcc
I use the latest and greatest gcc, gcc-2.95.2. 2.95 will also work.
Earlier version might, but you are on your own.
Read and understand these instruction carefully. If you don't
Read these instruction carefully and understand them. If you don't
understand or can't follow the instructions, you probably shouldn't
want to build GLib (or GTk+ or GIMP) yourself anyway. Unfortunately,
even building software that just *use* GLib or GTk+ also require to
have the right compiler set up the right way, so follow these
instructions in that case, too.
want to build GLib (or GTk+ or GIMP) yourself anyway.
0) Get and install Cygwin B20.1.
@ -270,6 +275,26 @@ diff -ru2 ./w32api/include/wingdi.h ../../src/mingw-runtime-19991107/w32api/incl
================ cut here ================
fpos_t should be long long with MSVCRT.DLL:
================ cut here ================
--- stdio.h~ Thu Aug 19 02:47:42 1999
+++ stdio.h Mon Jan 17 21:58:20 2000
@@ -296,6 +296,11 @@
* it is fairly evident that the fpos_t type is a long (in CRTDLL.DLL).
* Perhaps an unsigned long? TODO?
+ * In MSVCRT.DLL it's a long long, however.
*/
+#ifdef __MSVCRT__
+typedef long long fpos_t;
+#else
typedef long fpos_t;
+#endif
int fgetpos (FILE* fileGetPosition, fpos_t* pfpos);
================ cut here ================
(I haven't checked yet if other small errors I have noticed in
previous w32api header versions have been corrected.)

View File

@ -1,10 +1,10 @@
#!/bin/bash
# Temporary hack until building dlls or executables with exported
# entry points is easier with gcc -mno-cygwin ("mingw32").
# Temporary hack until building dlls is easier with gcc -mno-cygwin
# ("mingw32").
# This is usable with cygwin b20.1 and egcs-2.91.66 19990314
# (egcs-1.1.2 release) or gcc-2.95 as distributed by Mumit Khan. For
# (egcs-1.1.2 release) or gcc-2.95(.2) as distributed by Mumit Khan. For
# other combinations, no idea.
GCC="gcc"

View File

@ -88,6 +88,8 @@ EXPORTS
g_direct_equal
g_direct_hash
g_dirname
g_filename_from_utf8
g_filename_to_utf8
g_free
g_get_current_dir
g_get_current_time
@ -348,7 +350,6 @@ EXPORTS
g_source_remove
g_source_remove_by_source_data
g_source_remove_by_user_data
g_spaced_primes_closest
g_static_mutex_get_mutex_impl
g_static_private_get
g_static_private_set
@ -446,3 +447,4 @@ EXPORTS
glib_major_version
glib_micro_version
glib_minor_version
g_spaced_primes_closest

7
glib.h
View File

@ -1604,6 +1604,13 @@ gchar* g_strcompress (const gchar *source);
*/
gchar* g_strescape (const gchar *source,
const gchar *exceptions);
/*
* Convert between the operating system (or C runtime)
* representation of file names and UTF-8.
*/
gchar* g_filename_to_utf8 (const gchar *opsysstring);
gchar* g_filename_from_utf8 (const gchar *utf8string);
/* Deprecated API:
* gchar* g_strescape (const gchar *source);
* Luckily this function wasn't much used.

View File

@ -88,6 +88,8 @@ EXPORTS
g_direct_equal
g_direct_hash
g_dirname
g_filename_from_utf8
g_filename_to_utf8
g_free
g_get_current_dir
g_get_current_time
@ -348,7 +350,6 @@ EXPORTS
g_source_remove
g_source_remove_by_source_data
g_source_remove_by_user_data
g_spaced_primes_closest
g_static_mutex_get_mutex_impl
g_static_private_get
g_static_private_set
@ -446,3 +447,4 @@ EXPORTS
glib_major_version
glib_micro_version
glib_minor_version
g_spaced_primes_closest

View File

@ -1604,6 +1604,13 @@ gchar* g_strcompress (const gchar *source);
*/
gchar* g_strescape (const gchar *source,
const gchar *exceptions);
/*
* Convert between the operating system (or C runtime)
* representation of file names and UTF-8.
*/
gchar* g_filename_to_utf8 (const gchar *opsysstring);
gchar* g_filename_from_utf8 (const gchar *utf8string);
/* Deprecated API:
* gchar* g_strescape (const gchar *source);
* Luckily this function wasn't much used.

View File

@ -42,6 +42,11 @@
#include <signal.h>
#endif
#include "glib.h"
#ifdef G_OS_WIN32
#include <windows.h>
#endif
/* do not include <unistd.h> in this place since it
* inteferes with g_strsignal() on some OSes
*/
@ -1068,6 +1073,225 @@ g_strescape (const gchar *source,
return dest;
}
/*
* g_filename_to_utf8
*
* Converts a string which is in the encoding used for file names by
* the C runtime (usually the same as that used by the operating
* system) in the current locale into a UTF-8 string.
*/
gchar *
g_filename_to_utf8 (const gchar *opsysstring)
{
#ifdef G_OS_WIN32
gint i, clen, wclen, first;
const gint len = strlen (opsysstring);
wchar_t *wcs, wc;
gchar *result, *bp;
const wchar_t *wcp;
wcs = g_new (wchar_t, len);
wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len);
wcp = wcs;
clen = 0;
for (i = 0; i < wclen; i++)
{
wc = *wcp++;
if (wc < 0x80)
clen += 1;
else if (wc < 0x800)
clen += 2;
else if (wc < 0x10000)
clen += 3;
else if (wc < 0x200000)
clen += 4;
else if (wc < 0x4000000)
clen += 5;
else
clen += 6;
}
result = g_malloc (clen + 1);
wcp = wcs;
bp = result;
for (i = 0; i < wclen; i++)
{
wc = *wcp++;
if (wc < 0x80)
{
first = 0;
clen = 1;
}
else if (wc < 0x800)
{
first = 0xc0;
clen = 2;
}
else if (wc < 0x10000)
{
first = 0xe0;
clen = 3;
}
else if (wc < 0x200000)
{
first = 0xf0;
clen = 4;
}
else if (wc < 0x4000000)
{
first = 0xf8;
clen = 5;
}
else
{
first = 0xfc;
clen = 6;
}
/* Woo-hoo! */
switch (clen)
{
case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 1: bp[0] = wc | first;
}
bp += clen;
}
*bp = 0;
g_free (wcs);
return result;
#else
return g_strdup (opsysstring);
#endif
}
/*
* g_filename_from_utf8
*
* The reverse of g_filename_to_utf8.
*/
gchar *
g_filename_from_utf8 (const gchar *utf8string)
{
#ifdef G_OS_WIN32
gint i, mask, clen, wclen, mblen;
const gint len = strlen (utf8string);
wchar_t *wcs, *wcp;
gchar *result;
guchar *cp, *end, c;
gint n;
/* First convert to wide chars */
cp = (guchar *) utf8string;
end = cp + len;
n = 0;
wcs = g_new (wchar_t, len + 1);
wcp = wcs;
while (cp != end)
{
mask = 0;
c = *cp;
if (c < 0x80)
{
clen = 1;
mask = 0x7f;
}
else if ((c & 0xe0) == 0xc0)
{
clen = 2;
mask = 0x1f;
}
else if ((c & 0xf0) == 0xe0)
{
clen = 3;
mask = 0x0f;
}
else if ((c & 0xf8) == 0xf0)
{
clen = 4;
mask = 0x07;
}
else if ((c & 0xfc) == 0xf8)
{
clen = 5;
mask = 0x03;
}
else if ((c & 0xfc) == 0xfc)
{
clen = 6;
mask = 0x01;
}
else
{
g_free (wcs);
return NULL;
}
if (cp + clen > end)
{
g_free (wcs);
return NULL;
}
*wcp = (cp[0] & mask);
for (i = 1; i < clen; i++)
{
if ((cp[i] & 0xc0) != 0x80)
{
g_free (wcs);
return NULL;
}
*wcp <<= 6;
*wcp |= (cp[i] & 0x3f);
}
cp += clen;
wcp++;
n++;
}
if (cp != end)
{
g_free (wcs);
return NULL;
}
/* n is the number of wide chars constructed */
/* Convert to a string in the current ANSI codepage */
result = g_new (gchar, 3 * n + 1);
mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL);
result[mblen] = 0;
g_free (wcs);
return result;
#else
return g_strdup (utf8string);
#endif
}
/* blame Elliot for these next five routines */
gchar*
g_strchug (gchar *string)

View File

@ -42,6 +42,11 @@
#include <signal.h>
#endif
#include "glib.h"
#ifdef G_OS_WIN32
#include <windows.h>
#endif
/* do not include <unistd.h> in this place since it
* inteferes with g_strsignal() on some OSes
*/
@ -1068,6 +1073,225 @@ g_strescape (const gchar *source,
return dest;
}
/*
* g_filename_to_utf8
*
* Converts a string which is in the encoding used for file names by
* the C runtime (usually the same as that used by the operating
* system) in the current locale into a UTF-8 string.
*/
gchar *
g_filename_to_utf8 (const gchar *opsysstring)
{
#ifdef G_OS_WIN32
gint i, clen, wclen, first;
const gint len = strlen (opsysstring);
wchar_t *wcs, wc;
gchar *result, *bp;
const wchar_t *wcp;
wcs = g_new (wchar_t, len);
wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len);
wcp = wcs;
clen = 0;
for (i = 0; i < wclen; i++)
{
wc = *wcp++;
if (wc < 0x80)
clen += 1;
else if (wc < 0x800)
clen += 2;
else if (wc < 0x10000)
clen += 3;
else if (wc < 0x200000)
clen += 4;
else if (wc < 0x4000000)
clen += 5;
else
clen += 6;
}
result = g_malloc (clen + 1);
wcp = wcs;
bp = result;
for (i = 0; i < wclen; i++)
{
wc = *wcp++;
if (wc < 0x80)
{
first = 0;
clen = 1;
}
else if (wc < 0x800)
{
first = 0xc0;
clen = 2;
}
else if (wc < 0x10000)
{
first = 0xe0;
clen = 3;
}
else if (wc < 0x200000)
{
first = 0xf0;
clen = 4;
}
else if (wc < 0x4000000)
{
first = 0xf8;
clen = 5;
}
else
{
first = 0xfc;
clen = 6;
}
/* Woo-hoo! */
switch (clen)
{
case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
case 1: bp[0] = wc | first;
}
bp += clen;
}
*bp = 0;
g_free (wcs);
return result;
#else
return g_strdup (opsysstring);
#endif
}
/*
* g_filename_from_utf8
*
* The reverse of g_filename_to_utf8.
*/
gchar *
g_filename_from_utf8 (const gchar *utf8string)
{
#ifdef G_OS_WIN32
gint i, mask, clen, wclen, mblen;
const gint len = strlen (utf8string);
wchar_t *wcs, *wcp;
gchar *result;
guchar *cp, *end, c;
gint n;
/* First convert to wide chars */
cp = (guchar *) utf8string;
end = cp + len;
n = 0;
wcs = g_new (wchar_t, len + 1);
wcp = wcs;
while (cp != end)
{
mask = 0;
c = *cp;
if (c < 0x80)
{
clen = 1;
mask = 0x7f;
}
else if ((c & 0xe0) == 0xc0)
{
clen = 2;
mask = 0x1f;
}
else if ((c & 0xf0) == 0xe0)
{
clen = 3;
mask = 0x0f;
}
else if ((c & 0xf8) == 0xf0)
{
clen = 4;
mask = 0x07;
}
else if ((c & 0xfc) == 0xf8)
{
clen = 5;
mask = 0x03;
}
else if ((c & 0xfc) == 0xfc)
{
clen = 6;
mask = 0x01;
}
else
{
g_free (wcs);
return NULL;
}
if (cp + clen > end)
{
g_free (wcs);
return NULL;
}
*wcp = (cp[0] & mask);
for (i = 1; i < clen; i++)
{
if ((cp[i] & 0xc0) != 0x80)
{
g_free (wcs);
return NULL;
}
*wcp <<= 6;
*wcp |= (cp[i] & 0x3f);
}
cp += clen;
wcp++;
n++;
}
if (cp != end)
{
g_free (wcs);
return NULL;
}
/* n is the number of wide chars constructed */
/* Convert to a string in the current ANSI codepage */
result = g_new (gchar, 3 * n + 1);
mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL);
result[mblen] = 0;
g_free (wcs);
return result;
#else
return g_strdup (utf8string);
#endif
}
/* blame Elliot for these next five routines */
gchar*
g_strchug (gchar *string)