Move charset and locale name functions to their own files

They did not really belong into either gutils or gutf8.
This commit is contained in:
Matthias Clasen
2011-10-16 18:40:58 -04:00
parent 86cc4b246d
commit 0589f715e5
6 changed files with 634 additions and 558 deletions

View File

@@ -34,8 +34,6 @@
#undef STRICT
#endif
#include "libcharset/libcharset.h"
#include "gconvert.h"
#include "ghash.h"
#include "gstrfuncs.h"
@@ -445,197 +443,6 @@ g_utf8_strncpy (gchar *dest,
return dest;
}
G_LOCK_DEFINE_STATIC (aliases);
static GHashTable *
get_alias_hash (void)
{
static GHashTable *alias_hash = NULL;
const char *aliases;
G_LOCK (aliases);
if (!alias_hash)
{
alias_hash = g_hash_table_new (g_str_hash, g_str_equal);
aliases = _g_locale_get_charset_aliases ();
while (*aliases != '\0')
{
const char *canonical;
const char *alias;
const char **alias_array;
int count = 0;
alias = aliases;
aliases += strlen (aliases) + 1;
canonical = aliases;
aliases += strlen (aliases) + 1;
alias_array = g_hash_table_lookup (alias_hash, canonical);
if (alias_array)
{
while (alias_array[count])
count++;
}
alias_array = g_renew (const char *, alias_array, count + 2);
alias_array[count] = alias;
alias_array[count + 1] = NULL;
g_hash_table_insert (alias_hash, (char *)canonical, alias_array);
}
}
G_UNLOCK (aliases);
return alias_hash;
}
/* As an abuse of the alias table, the following routines gets
* the charsets that are aliases for the canonical name.
*/
G_GNUC_INTERNAL const char **
_g_charset_get_aliases (const char *canonical_name)
{
GHashTable *alias_hash = get_alias_hash ();
return g_hash_table_lookup (alias_hash, canonical_name);
}
static gboolean
g_utf8_get_charset_internal (const char *raw_data,
const char **a)
{
const char *charset = getenv("CHARSET");
if (charset && *charset)
{
*a = charset;
if (charset && strstr (charset, "UTF-8"))
return TRUE;
else
return FALSE;
}
/* The libcharset code tries to be thread-safe without
* a lock, but has a memory leak and a missing memory
* barrier, so we lock for it
*/
G_LOCK (aliases);
charset = _g_locale_charset_unalias (raw_data);
G_UNLOCK (aliases);
if (charset && *charset)
{
*a = charset;
if (charset && strstr (charset, "UTF-8"))
return TRUE;
else
return FALSE;
}
/* Assume this for compatibility at present. */
*a = "US-ASCII";
return FALSE;
}
typedef struct _GCharsetCache GCharsetCache;
struct _GCharsetCache {
gboolean is_utf8;
gchar *raw;
gchar *charset;
};
static void
charset_cache_free (gpointer data)
{
GCharsetCache *cache = data;
g_free (cache->raw);
g_free (cache->charset);
g_free (cache);
}
/**
* g_get_charset:
* @charset: return location for character set name
*
* Obtains the character set for the <link linkend="setlocale">current
* locale</link>; you might use this character set as an argument to
* g_convert(), to convert from the current locale's encoding to some
* other encoding. (Frequently g_locale_to_utf8() and g_locale_from_utf8()
* are nice shortcuts, though.)
*
* On Windows the character set returned by this function is the
* so-called system default ANSI code-page. That is the character set
* used by the "narrow" versions of C library and Win32 functions that
* handle file names. It might be different from the character set
* used by the C library's current locale.
*
* The return value is %TRUE if the locale's encoding is UTF-8, in that
* case you can perhaps avoid calling g_convert().
*
* The string returned in @charset is not allocated, and should not be
* freed.
*
* Return value: %TRUE if the returned charset is UTF-8
*/
gboolean
g_get_charset (const char **charset)
{
static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
GCharsetCache *cache = g_private_get (&cache_private);
const gchar *raw;
if (!cache)
{
cache = g_new0 (GCharsetCache, 1);
g_private_set (&cache_private, cache);
}
G_LOCK (aliases);
raw = _g_locale_charset_raw ();
G_UNLOCK (aliases);
if (!(cache->raw && strcmp (cache->raw, raw) == 0))
{
const gchar *new_charset;
g_free (cache->raw);
g_free (cache->charset);
cache->raw = g_strdup (raw);
cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
cache->charset = g_strdup (new_charset);
}
if (charset)
*charset = cache->charset;
return cache->is_utf8;
}
/**
* g_get_codeset:
*
* Gets the character set for the current locale.
*
* Return value: a newly allocated string containing the name
* of the character set. This string must be freed with g_free().
*/
gchar *
g_get_codeset (void)
{
const gchar *charset;
g_get_charset (&charset);
return g_strdup (charset);
}
/* unicode_strchr */
/**