glib.h New functions for conversion between UTF-8 and the encoding

2000-02-01 Tor Lillqvist <tml@iki.fi> * glib.h * gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New functions for conversion between UTF-8 and the encoding expected by C runtime functions like open() and stat(), and returned by readdir(). Implement them on Win32 where we use the system "ANSI" codepage, which might be single-byte or double-byte. On Unix, just skip the issue for now and provide dummy implementations that return a copy of the argument. * README.win32 * build-dll * glib.def: Minor updates.
2025-07-20 17:07:52 +02:00 · 2000-02-02 23:39:32 +00:00
parent 86b2741c1e
commit c22cf34e92
16 changed files with 637 additions and 10 deletions
--- a/glib/gstrfuncs.c
+++ b/glib/gstrfuncs.c
@@ -42,6 +42,11 @@
 #include <signal.h>
 #endif
 #include "glib.h"
+
+#ifdef G_OS_WIN32
+#include <windows.h>
+#endif
+
 /* do not include <unistd.h> in this place since it
 * inteferes with g_strsignal() on some OSes
 */
@@ -1068,6 +1073,225 @@ g_strescape (const gchar *source,
  return dest;
 }

+/*
+ * g_filename_to_utf8
+ *
+ * Converts a string which is in the encoding used for file names by
+ * the C runtime (usually the same as that used by the operating
+ * system) in the current locale into a UTF-8 string.
+ */
+
+gchar *
+g_filename_to_utf8 (const gchar *opsysstring)
+{
+#ifdef G_OS_WIN32
+
+  gint i, clen, wclen, first;
+  const gint len = strlen (opsysstring);
+  wchar_t *wcs, wc;
+  gchar *result, *bp;
+  const wchar_t *wcp;
+
+  wcs = g_new (wchar_t, len);
+  wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len);
+
+  wcp = wcs;
+  clen = 0;
+  for (i = 0; i < wclen; i++)
+    {
+      wc = *wcp++;
+
+      if (wc < 0x80)
+	clen += 1;
+      else if (wc < 0x800)
+	clen += 2;
+      else if (wc < 0x10000)
+	clen += 3;
+      else if (wc < 0x200000)
+	clen += 4;
+      else if (wc < 0x4000000)
+	clen += 5;
+      else
+	clen += 6;
+    }
+
+  result = g_malloc (clen + 1);
+  
+  wcp = wcs;
+  bp = result;
+  for (i = 0; i < wclen; i++)
+    {
+      wc = *wcp++;
+
+      if (wc < 0x80)
+	{
+	  first = 0;
+	  clen = 1;
+	}
+      else if (wc < 0x800)
+	{
+	  first = 0xc0;
+	  clen = 2;
+	}
+      else if (wc < 0x10000)
+	{
+	  first = 0xe0;
+	  clen = 3;
+	}
+      else if (wc < 0x200000)
+	{
+	  first = 0xf0;
+	  clen = 4;
+	}
+      else if (wc < 0x4000000)
+	{
+	  first = 0xf8;
+	  clen = 5;
+	}
+      else
+	{
+	  first = 0xfc;
+	  clen = 6;
+	}
+      
+      /* Woo-hoo! */
+      switch (clen)
+	{
+	case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
+	case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
+	case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
+	case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
+	case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
+	case 1: bp[0] = wc | first;
+	}
+
+      bp += clen;
+    }
+  *bp = 0;
+
+  g_free (wcs);
+
+  return result;
+
+#else
+
+  return g_strdup (opsysstring);
+
+#endif
+}
+
+/*
+ * g_filename_from_utf8
+ *
+ * The reverse of g_filename_to_utf8.
+ */
+
+gchar *
+g_filename_from_utf8 (const gchar *utf8string)
+{
+#ifdef G_OS_WIN32
+
+  gint i, mask, clen, wclen, mblen;
+  const gint len = strlen (utf8string);
+  wchar_t *wcs, *wcp;
+  gchar *result;
+  guchar *cp, *end, c;
+  gint n;
+  
+  /* First convert to wide chars */
+  cp = (guchar *) utf8string;
+  end = cp + len;
+  n = 0;
+  wcs = g_new (wchar_t, len + 1);
+  wcp = wcs;
+  while (cp != end)
+    {
+      mask = 0;
+      c = *cp;
+
+      if (c < 0x80)
+	{
+	  clen = 1;
+	  mask = 0x7f;
+	}
+      else if ((c & 0xe0) == 0xc0)
+	{
+	  clen = 2;
+	  mask = 0x1f;
+	}
+      else if ((c & 0xf0) == 0xe0)
+	{
+	  clen = 3;
+	  mask = 0x0f;
+	}
+      else if ((c & 0xf8) == 0xf0)
+	{
+	  clen = 4;
+	  mask = 0x07;
+	}
+      else if ((c & 0xfc) == 0xf8)
+	{
+	  clen = 5;
+	  mask = 0x03;
+	}
+      else if ((c & 0xfc) == 0xfc)
+	{
+	  clen = 6;
+	  mask = 0x01;
+	}
+      else
+	{
+	  g_free (wcs);
+	  return NULL;
+	}
+
+      if (cp + clen > end)
+	{
+	  g_free (wcs);
+	  return NULL;
+	}
+
+      *wcp = (cp[0] & mask);
+      for (i = 1; i < clen; i++)
+	{
+	  if ((cp[i] & 0xc0) != 0x80)
+	    {
+	      g_free (wcs);
+	      return NULL;
+	    }
+	  *wcp <<= 6;
+	  *wcp |= (cp[i] & 0x3f);
+	}
+
+      cp += clen;
+      wcp++;
+      n++;
+    }
+  if (cp != end)
+    {
+      g_free (wcs);
+      return NULL;
+    }
+
+  /* n is the number of wide chars constructed */
+
+  /* Convert to a string in the current ANSI codepage */
+
+  result = g_new (gchar, 3 * n + 1);
+  mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL);
+  result[mblen] = 0;
+  g_free (wcs);
+
+  return result;
+
+#else
+
+  return g_strdup (utf8string);
+
+#endif
+}
+
+
 /* blame Elliot for these next five routines */
 gchar*
 g_strchug (gchar *string)