diff --git a/glib/Makefile.am b/glib/Makefile.am index 9b806c64c..22dbe4def 100644 --- a/glib/Makefile.am +++ b/glib/Makefile.am @@ -132,6 +132,7 @@ libglib_2_0_la_SOURCES = \ gbsearcharray.h \ gbuffer.c \ gbufferprivate.h \ + gcharset.c \ gchecksum.c \ gconvert.c \ gdataset.c \ @@ -250,7 +251,7 @@ deprecatedinclude_HEADERS = \ deprecated/gthread.h glibsubincludedir=$(includedir)/glib-2.0/glib -glibsubinclude_HEADERS = \ +glibsubinclude_HEADERS = \ galloca.h \ garray.h \ gasyncqueue.h \ @@ -258,7 +259,8 @@ glibsubinclude_HEADERS = \ gbacktrace.h \ gbase64.h \ gbitlock.h \ - gbookmarkfile.h \ + gbookmarkfile.h \ + gcharset.h \ gchecksum.h \ gconvert.h \ gdataset.h \ diff --git a/glib/gcharset.c b/glib/gcharset.c new file mode 100644 index 000000000..d5e17bfb0 --- /dev/null +++ b/glib/gcharset.c @@ -0,0 +1,589 @@ +/* gcharset.c - Charset information + * + * Copyright (C) 2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include "config.h" + +#include "gcharset.h" + +#include "garray.h" +#include "genviron.h" +#include "ghash.h" +#include "gmessages.h" +#include "gstrfuncs.h" +#include "gthread.h" + +#include "libcharset/libcharset.h" + +#include +#include + +G_LOCK_DEFINE_STATIC (aliases); + +static GHashTable * +get_alias_hash (void) +{ + static GHashTable *alias_hash = NULL; + const char *aliases; + + G_LOCK (aliases); + + if (!alias_hash) + { + alias_hash = g_hash_table_new (g_str_hash, g_str_equal); + + aliases = _g_locale_get_charset_aliases (); + while (*aliases != '\0') + { + const char *canonical; + const char *alias; + const char **alias_array; + int count = 0; + + alias = aliases; + aliases += strlen (aliases) + 1; + canonical = aliases; + aliases += strlen (aliases) + 1; + + alias_array = g_hash_table_lookup (alias_hash, canonical); + if (alias_array) + { + while (alias_array[count]) + count++; + } + + alias_array = g_renew (const char *, alias_array, count + 2); + alias_array[count] = alias; + alias_array[count + 1] = NULL; + + g_hash_table_insert (alias_hash, (char *)canonical, alias_array); + } + } + + G_UNLOCK (aliases); + + return alias_hash; +} + +/* As an abuse of the alias table, the following routines gets + * the charsets that are aliases for the canonical name. + */ +G_GNUC_INTERNAL const char ** +_g_charset_get_aliases (const char *canonical_name) +{ + GHashTable *alias_hash = get_alias_hash (); + + return g_hash_table_lookup (alias_hash, canonical_name); +} + +static gboolean +g_utf8_get_charset_internal (const char *raw_data, + const char **a) +{ + const char *charset = g_getenv ("CHARSET"); + + if (charset && *charset) + { + *a = charset; + + if (charset && strstr (charset, "UTF-8")) + return TRUE; + else + return FALSE; + } + + /* The libcharset code tries to be thread-safe without + * a lock, but has a memory leak and a missing memory + * barrier, so we lock for it + */ + G_LOCK (aliases); + charset = _g_locale_charset_unalias (raw_data); + G_UNLOCK (aliases); + + if (charset && *charset) + { + *a = charset; + + if (charset && strstr (charset, "UTF-8")) + return TRUE; + else + return FALSE; + } + + /* Assume this for compatibility at present. */ + *a = "US-ASCII"; + + return FALSE; +} + +typedef struct _GCharsetCache GCharsetCache; + +struct _GCharsetCache { + gboolean is_utf8; + gchar *raw; + gchar *charset; +}; + +static void +charset_cache_free (gpointer data) +{ + GCharsetCache *cache = data; + g_free (cache->raw); + g_free (cache->charset); + g_free (cache); +} + +/** + * g_get_charset: + * @charset: return location for character set name + * + * Obtains the character set for the current + * locale; you might use this character set as an argument to + * g_convert(), to convert from the current locale's encoding to some + * other encoding. (Frequently g_locale_to_utf8() and g_locale_from_utf8() + * are nice shortcuts, though.) + * + * On Windows the character set returned by this function is the + * so-called system default ANSI code-page. That is the character set + * used by the "narrow" versions of C library and Win32 functions that + * handle file names. It might be different from the character set + * used by the C library's current locale. + * + * The return value is %TRUE if the locale's encoding is UTF-8, in that + * case you can perhaps avoid calling g_convert(). + * + * The string returned in @charset is not allocated, and should not be + * freed. + * + * Return value: %TRUE if the returned charset is UTF-8 + */ +gboolean +g_get_charset (const char **charset) +{ + static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); + GCharsetCache *cache = g_private_get (&cache_private); + const gchar *raw; + + if (!cache) + { + cache = g_new0 (GCharsetCache, 1); + g_private_set (&cache_private, cache); + } + + G_LOCK (aliases); + raw = _g_locale_charset_raw (); + G_UNLOCK (aliases); + + if (!(cache->raw && strcmp (cache->raw, raw) == 0)) + { + const gchar *new_charset; + + g_free (cache->raw); + g_free (cache->charset); + cache->raw = g_strdup (raw); + cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); + cache->charset = g_strdup (new_charset); + } + + if (charset) + *charset = cache->charset; + + return cache->is_utf8; +} + +/** + * g_get_codeset: + * + * Gets the character set for the current locale. + * + * Return value: a newly allocated string containing the name + * of the character set. This string must be freed with g_free(). + */ +gchar * +g_get_codeset (void) +{ + const gchar *charset; + + g_get_charset (&charset); + + return g_strdup (charset); +} + +#ifndef G_OS_WIN32 + +static GHashTable *alias_table = NULL; + +/* read an alias file for the locales */ +static void +read_aliases (gchar *file) +{ + FILE *fp; + char buf[256]; + + if (!alias_table) + alias_table = g_hash_table_new (g_str_hash, g_str_equal); + fp = fopen (file,"r"); + if (!fp) + return; + while (fgets (buf, 256, fp)) + { + char *p, *q; + + g_strstrip (buf); + + /* Line is a comment */ + if ((buf[0] == '#') || (buf[0] == '\0')) + continue; + + /* Reads first column */ + for (p = buf, q = NULL; *p; p++) { + if ((*p == '\t') || (*p == ' ') || (*p == ':')) { + *p = '\0'; + q = p+1; + while ((*q == '\t') || (*q == ' ')) { + q++; + } + break; + } + } + /* The line only had one column */ + if (!q || *q == '\0') + continue; + + /* Read second column */ + for (p = q; *p; p++) { + if ((*p == '\t') || (*p == ' ')) { + *p = '\0'; + break; + } + } + + /* Add to alias table if necessary */ + if (!g_hash_table_lookup (alias_table, buf)) { + g_hash_table_insert (alias_table, g_strdup (buf), g_strdup (q)); + } + } + fclose (fp); +} + +#endif + +static char * +unalias_lang (char *lang) +{ +#ifndef G_OS_WIN32 + char *p; + int i; + + if (!alias_table) + read_aliases ("/usr/share/locale/locale.alias"); + + i = 0; + while ((p = g_hash_table_lookup (alias_table, lang)) && (strcmp (p, lang) != 0)) + { + lang = p; + if (i++ == 30) + { + static gboolean said_before = FALSE; + if (!said_before) + g_warning ("Too many alias levels for a locale, " + "may indicate a loop"); + said_before = TRUE; + return lang; + } + } +#endif + return lang; +} + +/* Mask for components of locale spec. The ordering here is from + * least significant to most significant + */ +enum +{ + COMPONENT_CODESET = 1 << 0, + COMPONENT_TERRITORY = 1 << 1, + COMPONENT_MODIFIER = 1 << 2 +}; + +/* Break an X/Open style locale specification into components + */ +static guint +explode_locale (const gchar *locale, + gchar **language, + gchar **territory, + gchar **codeset, + gchar **modifier) +{ + const gchar *uscore_pos; + const gchar *at_pos; + const gchar *dot_pos; + + guint mask = 0; + + uscore_pos = strchr (locale, '_'); + dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.'); + at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@'); + + if (at_pos) + { + mask |= COMPONENT_MODIFIER; + *modifier = g_strdup (at_pos); + } + else + at_pos = locale + strlen (locale); + + if (dot_pos) + { + mask |= COMPONENT_CODESET; + *codeset = g_strndup (dot_pos, at_pos - dot_pos); + } + else + dot_pos = at_pos; + + if (uscore_pos) + { + mask |= COMPONENT_TERRITORY; + *territory = g_strndup (uscore_pos, dot_pos - uscore_pos); + } + else + uscore_pos = dot_pos; + + *language = g_strndup (locale, uscore_pos - locale); + + return mask; +} + +/* + * Compute all interesting variants for a given locale name - + * by stripping off different components of the value. + * + * For simplicity, we assume that the locale is in + * X/Open format: language[_territory][.codeset][@modifier] + * + * TODO: Extend this to handle the CEN format (see the GNUlibc docs) + * as well. We could just copy the code from glibc wholesale + * but it is big, ugly, and complicated, so I'm reluctant + * to do so when this should handle 99% of the time... + */ +static void +append_locale_variants (GPtrArray *array, + const gchar *locale) +{ + gchar *language = NULL; + gchar *territory = NULL; + gchar *codeset = NULL; + gchar *modifier = NULL; + + guint mask; + guint i, j; + + g_return_if_fail (locale != NULL); + + mask = explode_locale (locale, &language, &territory, &codeset, &modifier); + + /* Iterate through all possible combinations, from least attractive + * to most attractive. + */ + for (j = 0; j <= mask; ++j) + { + i = mask - j; + + if ((i & ~mask) == 0) + { + gchar *val = g_strconcat (language, + (i & COMPONENT_TERRITORY) ? territory : "", + (i & COMPONENT_CODESET) ? codeset : "", + (i & COMPONENT_MODIFIER) ? modifier : "", + NULL); + g_ptr_array_add (array, val); + } + } + + g_free (language); + if (mask & COMPONENT_CODESET) + g_free (codeset); + if (mask & COMPONENT_TERRITORY) + g_free (territory); + if (mask & COMPONENT_MODIFIER) + g_free (modifier); +} + +/** + * g_get_locale_variants: + * @locale: a locale identifier + * + * Returns a list of derived variants of @locale, which can be used to + * e.g. construct locale-dependent filenames or search paths. The returned + * list is sorted from most desirable to least desirable. + * This function handles territory, charset and extra locale modifiers. + * + * For example, if @locale is "fr_BE", then the returned list + * is "fr_BE", "fr". + * + * If you need the list of variants for the current locale, + * use g_get_language_names(). + * + * Returns: (transfer full) (array zero-terminated=1) (element-type utf8): a newly + * allocated array of newly allocated strings with the locale variants. Free with + * g_strfreev(). + * + * Since: 2.28 + */ +gchar ** +g_get_locale_variants (const gchar *locale) +{ + GPtrArray *array; + + g_return_val_if_fail (locale != NULL, NULL); + + array = g_ptr_array_sized_new (8); + append_locale_variants (array, locale); + g_ptr_array_add (array, NULL); + + return (gchar **) g_ptr_array_free (array, FALSE); +} + +/* The following is (partly) taken from the gettext package. + Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. */ + +static const gchar * +guess_category_value (const gchar *category_name) +{ + const gchar *retval; + + /* The highest priority value is the `LANGUAGE' environment + variable. This is a GNU extension. */ + retval = g_getenv ("LANGUAGE"); + if ((retval != NULL) && (retval[0] != '\0')) + return retval; + + /* `LANGUAGE' is not set. So we have to proceed with the POSIX + methods of looking to `LC_ALL', `LC_xxx', and `LANG'. On some + systems this can be done by the `setlocale' function itself. */ + + /* Setting of LC_ALL overwrites all other. */ + retval = g_getenv ("LC_ALL"); + if ((retval != NULL) && (retval[0] != '\0')) + return retval; + + /* Next comes the name of the desired category. */ + retval = g_getenv (category_name); + if ((retval != NULL) && (retval[0] != '\0')) + return retval; + + /* Last possibility is the LANG environment variable. */ + retval = g_getenv ("LANG"); + if ((retval != NULL) && (retval[0] != '\0')) + return retval; + +#ifdef G_PLATFORM_WIN32 + /* g_win32_getlocale() first checks for LC_ALL, LC_MESSAGES and + * LANG, which we already did above. Oh well. The main point of + * calling g_win32_getlocale() is to get the thread's locale as used + * by Windows and the Microsoft C runtime (in the "English_United + * States" format) translated into the Unixish format. + */ + { + char *locale = g_win32_getlocale (); + retval = g_intern_string (locale); + g_free (locale); + return retval; + } +#endif + + return NULL; +} + +typedef struct _GLanguageNamesCache GLanguageNamesCache; + +struct _GLanguageNamesCache { + gchar *languages; + gchar **language_names; +}; + +static void +language_names_cache_free (gpointer data) +{ + GLanguageNamesCache *cache = data; + g_free (cache->languages); + g_strfreev (cache->language_names); + g_free (cache); +} + +/** + * g_get_language_names: + * + * Computes a list of applicable locale names, which can be used to + * e.g. construct locale-dependent filenames or search paths. The returned + * list is sorted from most desirable to least desirable and always contains + * the default locale "C". + * + * For example, if LANGUAGE=de:en_US, then the returned list is + * "de", "en_US", "en", "C". + * + * This function consults the environment variables LANGUAGE, + * LC_ALL, LC_MESSAGES and LANG + * to find the list of locales specified by the user. + * + * Return value: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by GLib + * that must not be modified or freed. + * + * Since: 2.6 + **/ +const gchar * const * +g_get_language_names (void) +{ + static GPrivate cache_private = G_PRIVATE_INIT (language_names_cache_free); + GLanguageNamesCache *cache = g_private_get (&cache_private); + const gchar *value; + + if (!cache) + { + cache = g_new0 (GLanguageNamesCache, 1); + g_private_set (&cache_private, cache); + } + + value = guess_category_value ("LC_MESSAGES"); + if (!value) + value = "C"; + + if (!(cache->languages && strcmp (cache->languages, value) == 0)) + { + GPtrArray *array; + gchar **alist, **a; + + g_free (cache->languages); + g_strfreev (cache->language_names); + cache->languages = g_strdup (value); + + array = g_ptr_array_sized_new (8); + + alist = g_strsplit (value, ":", 0); + for (a = alist; *a; a++) + append_locale_variants (array, unalias_lang (*a)); + g_strfreev (alist); + g_ptr_array_add (array, g_strdup ("C")); + g_ptr_array_add (array, NULL); + + cache->language_names = (gchar **) g_ptr_array_free (array, FALSE); + } + + return (const gchar * const *) cache->language_names; +} diff --git a/glib/gcharset.h b/glib/gcharset.h new file mode 100644 index 000000000..cf7805be6 --- /dev/null +++ b/glib/gcharset.h @@ -0,0 +1,40 @@ +/* gcharset.h - Charset functions + * + * Copyright (C) 2011 Red Hat, Inc. + * + * The GLib Library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * The GLib Library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with the Gnome Library; see the file COPYING.LIB. If not, + * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION) +#error "Only can be included directly." +#endif + +#ifndef __G_CHARSET_H___ +#define __G_CHARSET_H__ + +#include + +G_BEGIN_DECLS + +gboolean g_get_charset (const char **charset); +gchar * g_get_codeset (void); + +const gchar * const * g_get_language_names (void); +gchar ** g_get_locale_variants (const gchar *locale); + +G_END_DECLS + +#endif /* __G_CHARSET_H__ */ diff --git a/glib/glib.h b/glib/glib.h index a1bcb7d09..461703381 100644 --- a/glib/glib.h +++ b/glib/glib.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/glib/gutf8.c b/glib/gutf8.c index dfca1452b..a7bc343f0 100644 --- a/glib/gutf8.c +++ b/glib/gutf8.c @@ -34,8 +34,6 @@ #undef STRICT #endif -#include "libcharset/libcharset.h" - #include "gconvert.h" #include "ghash.h" #include "gstrfuncs.h" @@ -445,197 +443,6 @@ g_utf8_strncpy (gchar *dest, return dest; } -G_LOCK_DEFINE_STATIC (aliases); - -static GHashTable * -get_alias_hash (void) -{ - static GHashTable *alias_hash = NULL; - const char *aliases; - - G_LOCK (aliases); - - if (!alias_hash) - { - alias_hash = g_hash_table_new (g_str_hash, g_str_equal); - - aliases = _g_locale_get_charset_aliases (); - while (*aliases != '\0') - { - const char *canonical; - const char *alias; - const char **alias_array; - int count = 0; - - alias = aliases; - aliases += strlen (aliases) + 1; - canonical = aliases; - aliases += strlen (aliases) + 1; - - alias_array = g_hash_table_lookup (alias_hash, canonical); - if (alias_array) - { - while (alias_array[count]) - count++; - } - - alias_array = g_renew (const char *, alias_array, count + 2); - alias_array[count] = alias; - alias_array[count + 1] = NULL; - - g_hash_table_insert (alias_hash, (char *)canonical, alias_array); - } - } - - G_UNLOCK (aliases); - - return alias_hash; -} - -/* As an abuse of the alias table, the following routines gets - * the charsets that are aliases for the canonical name. - */ -G_GNUC_INTERNAL const char ** -_g_charset_get_aliases (const char *canonical_name) -{ - GHashTable *alias_hash = get_alias_hash (); - - return g_hash_table_lookup (alias_hash, canonical_name); -} - -static gboolean -g_utf8_get_charset_internal (const char *raw_data, - const char **a) -{ - const char *charset = getenv("CHARSET"); - - if (charset && *charset) - { - *a = charset; - - if (charset && strstr (charset, "UTF-8")) - return TRUE; - else - return FALSE; - } - - /* The libcharset code tries to be thread-safe without - * a lock, but has a memory leak and a missing memory - * barrier, so we lock for it - */ - G_LOCK (aliases); - charset = _g_locale_charset_unalias (raw_data); - G_UNLOCK (aliases); - - if (charset && *charset) - { - *a = charset; - - if (charset && strstr (charset, "UTF-8")) - return TRUE; - else - return FALSE; - } - - /* Assume this for compatibility at present. */ - *a = "US-ASCII"; - - return FALSE; -} - -typedef struct _GCharsetCache GCharsetCache; - -struct _GCharsetCache { - gboolean is_utf8; - gchar *raw; - gchar *charset; -}; - -static void -charset_cache_free (gpointer data) -{ - GCharsetCache *cache = data; - g_free (cache->raw); - g_free (cache->charset); - g_free (cache); -} - -/** - * g_get_charset: - * @charset: return location for character set name - * - * Obtains the character set for the current - * locale; you might use this character set as an argument to - * g_convert(), to convert from the current locale's encoding to some - * other encoding. (Frequently g_locale_to_utf8() and g_locale_from_utf8() - * are nice shortcuts, though.) - * - * On Windows the character set returned by this function is the - * so-called system default ANSI code-page. That is the character set - * used by the "narrow" versions of C library and Win32 functions that - * handle file names. It might be different from the character set - * used by the C library's current locale. - * - * The return value is %TRUE if the locale's encoding is UTF-8, in that - * case you can perhaps avoid calling g_convert(). - * - * The string returned in @charset is not allocated, and should not be - * freed. - * - * Return value: %TRUE if the returned charset is UTF-8 - */ -gboolean -g_get_charset (const char **charset) -{ - static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); - GCharsetCache *cache = g_private_get (&cache_private); - const gchar *raw; - - if (!cache) - { - cache = g_new0 (GCharsetCache, 1); - g_private_set (&cache_private, cache); - } - - G_LOCK (aliases); - raw = _g_locale_charset_raw (); - G_UNLOCK (aliases); - - if (!(cache->raw && strcmp (cache->raw, raw) == 0)) - { - const gchar *new_charset; - - g_free (cache->raw); - g_free (cache->charset); - cache->raw = g_strdup (raw); - cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); - cache->charset = g_strdup (new_charset); - } - - if (charset) - *charset = cache->charset; - - return cache->is_utf8; -} - -/** - * g_get_codeset: - * - * Gets the character set for the current locale. - * - * Return value: a newly allocated string containing the name - * of the character set. This string must be freed with g_free(). - */ -gchar * -g_get_codeset (void) -{ - const gchar *charset; - - g_get_charset (&charset); - - return g_strdup (charset); -} - /* unicode_strchr */ /** diff --git a/glib/gutils.c b/glib/gutils.c index 98bd40bdc..294f1589e 100644 --- a/glib/gutils.c +++ b/glib/gutils.c @@ -2485,369 +2485,6 @@ g_get_system_config_dirs (void) return (const gchar * const *) conf_dir_vector; } -#ifndef G_OS_WIN32 - -static GHashTable *alias_table = NULL; - -/* read an alias file for the locales */ -static void -read_aliases (gchar *file) -{ - FILE *fp; - char buf[256]; - - if (!alias_table) - alias_table = g_hash_table_new (g_str_hash, g_str_equal); - fp = fopen (file,"r"); - if (!fp) - return; - while (fgets (buf, 256, fp)) - { - char *p, *q; - - g_strstrip (buf); - - /* Line is a comment */ - if ((buf[0] == '#') || (buf[0] == '\0')) - continue; - - /* Reads first column */ - for (p = buf, q = NULL; *p; p++) { - if ((*p == '\t') || (*p == ' ') || (*p == ':')) { - *p = '\0'; - q = p+1; - while ((*q == '\t') || (*q == ' ')) { - q++; - } - break; - } - } - /* The line only had one column */ - if (!q || *q == '\0') - continue; - - /* Read second column */ - for (p = q; *p; p++) { - if ((*p == '\t') || (*p == ' ')) { - *p = '\0'; - break; - } - } - - /* Add to alias table if necessary */ - if (!g_hash_table_lookup (alias_table, buf)) { - g_hash_table_insert (alias_table, g_strdup (buf), g_strdup (q)); - } - } - fclose (fp); -} - -#endif - -static char * -unalias_lang (char *lang) -{ -#ifndef G_OS_WIN32 - char *p; - int i; - - if (!alias_table) - read_aliases ("/usr/share/locale/locale.alias"); - - i = 0; - while ((p = g_hash_table_lookup (alias_table, lang)) && (strcmp (p, lang) != 0)) - { - lang = p; - if (i++ == 30) - { - static gboolean said_before = FALSE; - if (!said_before) - g_warning ("Too many alias levels for a locale, " - "may indicate a loop"); - said_before = TRUE; - return lang; - } - } -#endif - return lang; -} - -/* Mask for components of locale spec. The ordering here is from - * least significant to most significant - */ -enum -{ - COMPONENT_CODESET = 1 << 0, - COMPONENT_TERRITORY = 1 << 1, - COMPONENT_MODIFIER = 1 << 2 -}; - -/* Break an X/Open style locale specification into components - */ -static guint -explode_locale (const gchar *locale, - gchar **language, - gchar **territory, - gchar **codeset, - gchar **modifier) -{ - const gchar *uscore_pos; - const gchar *at_pos; - const gchar *dot_pos; - - guint mask = 0; - - uscore_pos = strchr (locale, '_'); - dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.'); - at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@'); - - if (at_pos) - { - mask |= COMPONENT_MODIFIER; - *modifier = g_strdup (at_pos); - } - else - at_pos = locale + strlen (locale); - - if (dot_pos) - { - mask |= COMPONENT_CODESET; - *codeset = g_strndup (dot_pos, at_pos - dot_pos); - } - else - dot_pos = at_pos; - - if (uscore_pos) - { - mask |= COMPONENT_TERRITORY; - *territory = g_strndup (uscore_pos, dot_pos - uscore_pos); - } - else - uscore_pos = dot_pos; - - *language = g_strndup (locale, uscore_pos - locale); - - return mask; -} - -/* - * Compute all interesting variants for a given locale name - - * by stripping off different components of the value. - * - * For simplicity, we assume that the locale is in - * X/Open format: language[_territory][.codeset][@modifier] - * - * TODO: Extend this to handle the CEN format (see the GNUlibc docs) - * as well. We could just copy the code from glibc wholesale - * but it is big, ugly, and complicated, so I'm reluctant - * to do so when this should handle 99% of the time... - */ -static void -append_locale_variants (GPtrArray *array, - const gchar *locale) -{ - gchar *language = NULL; - gchar *territory = NULL; - gchar *codeset = NULL; - gchar *modifier = NULL; - - guint mask; - guint i, j; - - g_return_if_fail (locale != NULL); - - mask = explode_locale (locale, &language, &territory, &codeset, &modifier); - - /* Iterate through all possible combinations, from least attractive - * to most attractive. - */ - for (j = 0; j <= mask; ++j) - { - i = mask - j; - - if ((i & ~mask) == 0) - { - gchar *val = g_strconcat (language, - (i & COMPONENT_TERRITORY) ? territory : "", - (i & COMPONENT_CODESET) ? codeset : "", - (i & COMPONENT_MODIFIER) ? modifier : "", - NULL); - g_ptr_array_add (array, val); - } - } - - g_free (language); - if (mask & COMPONENT_CODESET) - g_free (codeset); - if (mask & COMPONENT_TERRITORY) - g_free (territory); - if (mask & COMPONENT_MODIFIER) - g_free (modifier); -} - -/** - * g_get_locale_variants: - * @locale: a locale identifier - * - * Returns a list of derived variants of @locale, which can be used to - * e.g. construct locale-dependent filenames or search paths. The returned - * list is sorted from most desirable to least desirable. - * This function handles territory, charset and extra locale modifiers. - * - * For example, if @locale is "fr_BE", then the returned list - * is "fr_BE", "fr". - * - * If you need the list of variants for the current locale, - * use g_get_language_names(). - * - * Returns: (transfer full) (array zero-terminated=1) (element-type utf8): a newly - * allocated array of newly allocated strings with the locale variants. Free with - * g_strfreev(). - * - * Since: 2.28 - */ -gchar ** -g_get_locale_variants (const gchar *locale) -{ - GPtrArray *array; - - g_return_val_if_fail (locale != NULL, NULL); - - array = g_ptr_array_sized_new (8); - append_locale_variants (array, locale); - g_ptr_array_add (array, NULL); - - return (gchar **) g_ptr_array_free (array, FALSE); -} - -/* The following is (partly) taken from the gettext package. - Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. */ - -static const gchar * -guess_category_value (const gchar *category_name) -{ - const gchar *retval; - - /* The highest priority value is the `LANGUAGE' environment - variable. This is a GNU extension. */ - retval = g_getenv ("LANGUAGE"); - if ((retval != NULL) && (retval[0] != '\0')) - return retval; - - /* `LANGUAGE' is not set. So we have to proceed with the POSIX - methods of looking to `LC_ALL', `LC_xxx', and `LANG'. On some - systems this can be done by the `setlocale' function itself. */ - - /* Setting of LC_ALL overwrites all other. */ - retval = g_getenv ("LC_ALL"); - if ((retval != NULL) && (retval[0] != '\0')) - return retval; - - /* Next comes the name of the desired category. */ - retval = g_getenv (category_name); - if ((retval != NULL) && (retval[0] != '\0')) - return retval; - - /* Last possibility is the LANG environment variable. */ - retval = g_getenv ("LANG"); - if ((retval != NULL) && (retval[0] != '\0')) - return retval; - -#ifdef G_PLATFORM_WIN32 - /* g_win32_getlocale() first checks for LC_ALL, LC_MESSAGES and - * LANG, which we already did above. Oh well. The main point of - * calling g_win32_getlocale() is to get the thread's locale as used - * by Windows and the Microsoft C runtime (in the "English_United - * States" format) translated into the Unixish format. - */ - { - char *locale = g_win32_getlocale (); - retval = g_intern_string (locale); - g_free (locale); - return retval; - } -#endif - - return NULL; -} - -typedef struct _GLanguageNamesCache GLanguageNamesCache; - -struct _GLanguageNamesCache { - gchar *languages; - gchar **language_names; -}; - -static void -language_names_cache_free (gpointer data) -{ - GLanguageNamesCache *cache = data; - g_free (cache->languages); - g_strfreev (cache->language_names); - g_free (cache); -} - -/** - * g_get_language_names: - * - * Computes a list of applicable locale names, which can be used to - * e.g. construct locale-dependent filenames or search paths. The returned - * list is sorted from most desirable to least desirable and always contains - * the default locale "C". - * - * For example, if LANGUAGE=de:en_US, then the returned list is - * "de", "en_US", "en", "C". - * - * This function consults the environment variables LANGUAGE, - * LC_ALL, LC_MESSAGES and LANG - * to find the list of locales specified by the user. - * - * Return value: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by GLib - * that must not be modified or freed. - * - * Since: 2.6 - **/ -const gchar * const * -g_get_language_names (void) -{ - static GPrivate cache_private = G_PRIVATE_INIT (language_names_cache_free); - GLanguageNamesCache *cache = g_private_get (&cache_private); - const gchar *value; - - if (!cache) - { - cache = g_new0 (GLanguageNamesCache, 1); - g_private_set (&cache_private, cache); - } - - value = guess_category_value ("LC_MESSAGES"); - if (!value) - value = "C"; - - if (!(cache->languages && strcmp (cache->languages, value) == 0)) - { - GPtrArray *array; - gchar **alist, **a; - - g_free (cache->languages); - g_strfreev (cache->language_names); - cache->languages = g_strdup (value); - - array = g_ptr_array_sized_new (8); - - alist = g_strsplit (value, ":", 0); - for (a = alist; *a; a++) - append_locale_variants (array, unalias_lang (*a)); - g_strfreev (alist); - g_ptr_array_add (array, g_strdup ("C")); - g_ptr_array_add (array, NULL); - - cache->language_names = (gchar **) g_ptr_array_free (array, FALSE); - } - - return (const gchar * const *) cache->language_names; -} - /** * g_nullify_pointer: * @nullify_location: the memory address of the pointer.