diff --git a/glib/gcharset.c b/glib/gcharset.c index bb775bda4..9f91a9b48 100644 --- a/glib/gcharset.c +++ b/glib/gcharset.c @@ -36,6 +36,12 @@ #include #include + +#if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET) +#include +#endif + +#include #ifdef G_OS_WIN32 #define WIN32_LEAN_AND_MEAN #include @@ -215,6 +221,87 @@ g_get_charset (const char **charset) return cache->is_utf8; } +/* + * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to + * LC_TIME) to correctly check for charset about time conversion relatives. + * + * Returns: %TRUE if the returned charset is UTF-8 + */ +gboolean +_g_get_time_charset (const char **charset) +{ + static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); + GCharsetCache *cache = g_private_get (&cache_private); + const gchar *raw; + + if (!cache) + cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); + +#ifdef HAVE_LANGINFO_TIME_CODESET + raw = nl_langinfo (_NL_TIME_CODESET); +#else + G_LOCK (aliases); + raw = _g_locale_charset_raw (); + G_UNLOCK (aliases); +#endif + + if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) + { + const gchar *new_charset; + + g_free (cache->raw); + g_free (cache->charset); + cache->raw = g_strdup (raw); + cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); + cache->charset = g_strdup (new_charset); + } + + if (charset) + *charset = cache->charset; + + return cache->is_utf8; +} +/* + * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to + * LC_CTYPE) to correctly check for charset about CTYPE conversion relatives. + * + * Returns: %TRUE if the returned charset is UTF-8 + */ +gboolean +_g_get_ctype_charset (const char **charset) +{ + static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); + GCharsetCache *cache = g_private_get (&cache_private); + const gchar *raw; + + if (!cache) + cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); + +#ifdef HAVE_LANGINFO_CODESET + raw = nl_langinfo (CODESET); +#else + G_LOCK (aliases); + raw = _g_locale_charset_raw (); + G_UNLOCK (aliases); +#endif + + if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) + { + const gchar *new_charset; + + g_free (cache->raw); + g_free (cache->charset); + cache->raw = g_strdup (raw); + cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); + cache->charset = g_strdup (new_charset); + } + + if (charset) + *charset = cache->charset; + + return cache->is_utf8; +} + /** * g_get_codeset: * diff --git a/glib/gcharsetprivate.h b/glib/gcharsetprivate.h index f6b68dcd7..9b1def278 100644 --- a/glib/gcharsetprivate.h +++ b/glib/gcharsetprivate.h @@ -25,6 +25,10 @@ G_BEGIN_DECLS const char ** _g_charset_get_aliases (const char *canonical_name); +gboolean _g_get_time_charset (const char **charset); + +gboolean _g_get_ctype_charset (const char **charset); + G_END_DECLS #endif diff --git a/glib/gconvert.c b/glib/gconvert.c index f78cff01d..7697ff65d 100644 --- a/glib/gconvert.c +++ b/glib/gconvert.c @@ -40,6 +40,7 @@ #endif #include "gconvert.h" +#include "gconvertprivate.h" #include "gcharsetprivate.h" #include "gslist.h" @@ -1015,6 +1016,52 @@ g_locale_to_utf8 (const gchar *opsysstring, bytes_read, bytes_written, error); } +/* + * Do the exact same as g_locale_to_utf8 except that the charset would + * be retrieved from _g_get_time_charset (which uses LC_TIME) + * + * Returns: The converted string, or %NULL on an error. + */ +gchar * +_g_time_locale_to_utf8 (const gchar *opsysstring, + gssize len, + gsize *bytes_read, + gsize *bytes_written, + GError **error) +{ + const char *charset; + + if (_g_get_time_charset (&charset)) + return strdup_len (opsysstring, len, bytes_read, bytes_written, error); + else + return convert_checked (opsysstring, len, "UTF-8", charset, + CONVERT_CHECK_NO_NULS_IN_OUTPUT, + bytes_read, bytes_written, error); +} + +/* + * Do the exact same as g_locale_to_utf8 except that the charset would + * be retrieved from _g_get_ctype_charset (which uses LC_CTYPE) + * + * Returns: The converted string, or %NULL on an error. + */ +gchar * +_g_ctype_locale_to_utf8 (const gchar *opsysstring, + gssize len, + gsize *bytes_read, + gsize *bytes_written, + GError **error) +{ + const char *charset; + + if (_g_get_ctype_charset (&charset)) + return strdup_len (opsysstring, len, bytes_read, bytes_written, error); + else + return convert_checked (opsysstring, len, "UTF-8", charset, + CONVERT_CHECK_NO_NULS_IN_OUTPUT, + bytes_read, bytes_written, error); +} + /** * g_locale_from_utf8: * @utf8string: a UTF-8 encoded string diff --git a/glib/gconvertprivate.h b/glib/gconvertprivate.h new file mode 100644 index 000000000..5bdc87ff6 --- /dev/null +++ b/glib/gconvertprivate.h @@ -0,0 +1,40 @@ +/* gconvertprivate.h - Private GLib gconvert functions + * + * Copyright 2020 Frederic Martinsons + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, see . + */ + +#ifndef __G_CONVERTPRIVATE_H__ +#define __G_CONVERTPRIVATE_H__ + +G_BEGIN_DECLS + +#include "glib.h" + +gchar *_g_time_locale_to_utf8 (const gchar *opsysstring, + gssize len, + gsize *bytes_read, + gsize *bytes_written, + GError **error) G_GNUC_MALLOC; + +gchar *_g_ctype_locale_to_utf8 (const gchar *opsysstring, + gssize len, + gsize *bytes_read, + gsize *bytes_written, + GError **error) G_GNUC_MALLOC; + +G_END_DECLS + +#endif /* __G_CONVERTPRIVATE_H__ */ diff --git a/glib/gdatetime.c b/glib/gdatetime.c index 219dfb7de..a31afe713 100644 --- a/glib/gdatetime.c +++ b/glib/gdatetime.c @@ -62,7 +62,9 @@ #include "gatomic.h" #include "gcharset.h" +#include "gcharsetprivate.h" #include "gconvert.h" +#include "gconvertprivate.h" #include "gdatetime.h" #include "gfileutils.h" #include "ghash.h" @@ -2869,7 +2871,7 @@ initialize_alt_digits (void) if (g_strcmp0 (locale_digit, "") == 0) return NULL; - digit = g_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL); + digit = _g_ctype_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL); if (digit == NULL) return NULL; @@ -2993,7 +2995,7 @@ g_date_time_format_locale (GDateTime *datetime, if (locale_is_utf8) return g_date_time_format_utf8 (datetime, locale_format, outstr, locale_is_utf8); - utf8_format = g_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL); + utf8_format = _g_time_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL); if (utf8_format == NULL) return FALSE; @@ -3017,7 +3019,7 @@ string_append (GString *string, } else { - utf8 = g_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL); + utf8 = _g_time_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL); if (utf8 == NULL) return FALSE; g_string_append_len (string, utf8, utf8_len); @@ -3443,10 +3445,11 @@ g_date_time_format (GDateTime *datetime, { GString *outstr; const gchar *charset; - /* Avoid conversions from locale charset to UTF-8 if charset is compatible + /* Avoid conversions from locale (for LC_TIME and not for LC_MESSAGES unless + * specified otherwise) charset to UTF-8 if charset is compatible * with UTF-8 already. Check for UTF-8 and synonymous canonical names of * ASCII. */ - gboolean locale_is_utf8_compatible = g_get_charset (&charset) || + gboolean time_is_utf8_compatible = _g_get_time_charset (&charset) || g_strcmp0 ("ASCII", charset) == 0 || g_strcmp0 ("ANSI_X3.4-1968", charset) == 0; @@ -3457,7 +3460,7 @@ g_date_time_format (GDateTime *datetime, outstr = g_string_sized_new (strlen (format) * 2); if (!g_date_time_format_utf8 (datetime, format, outstr, - locale_is_utf8_compatible)) + time_is_utf8_compatible)) { g_string_free (outstr, TRUE); return NULL; diff --git a/glib/tests/gdatetime.c b/glib/tests/gdatetime.c index bc4eba93a..12f332b44 100644 --- a/glib/tests/gdatetime.c +++ b/glib/tests/gdatetime.c @@ -2318,6 +2318,116 @@ test_format_iso8601 (void) g_time_zone_unref (tz); } +typedef struct +{ + gboolean utf8_messages; + gboolean utf8_time; +} MixedUtf8TestData; + +static const MixedUtf8TestData utf8_time_non_utf8_messages = { + .utf8_messages = FALSE, + .utf8_time = TRUE +}; + +static const MixedUtf8TestData non_utf8_time_utf8_messages = { + .utf8_messages = TRUE, + .utf8_time = FALSE +}; + +static const MixedUtf8TestData utf8_time_utf8_messages = { + .utf8_messages = TRUE, + .utf8_time = TRUE +}; + +static const MixedUtf8TestData non_utf8_time_non_utf8_messages = { + .utf8_messages = FALSE, + .utf8_time = FALSE +}; + +static gboolean +check_and_set_locale (int category, + const gchar *name) +{ + setlocale (category, name); + if (strstr (setlocale (category, NULL), name) == NULL) + { + g_print ("Unavaible '%s' locale\n", name); + g_test_skip ("required locale not available, skipping tests"); + return FALSE; + } + return TRUE; +} + +static void +test_format_time_mixed_utf8 (gconstpointer data) +{ + const MixedUtf8TestData *test_data; + gchar *old_time_locale; + gchar *old_messages_locale; + g_test_bug ("https://gitlab.gnome.org/GNOME/glib/-/issues/2055"); + + test_data = (MixedUtf8TestData *) data; + old_time_locale = g_strdup (setlocale (LC_TIME, NULL)); + old_messages_locale = g_strdup (setlocale (LC_MESSAGES, NULL)); + if (test_data->utf8_time) + { + if (!check_and_set_locale (LC_TIME, "C.UTF-8")) + { + g_free (old_time_locale); + setlocale (LC_MESSAGES, old_messages_locale); + g_free (old_messages_locale); + return; + } + } + else + { + if (!check_and_set_locale (LC_TIME, "de_DE.iso88591")) + { + g_free (old_time_locale); + setlocale (LC_MESSAGES, old_messages_locale); + g_free (old_messages_locale); + return; + } + } + if (test_data->utf8_messages) + { + if (!check_and_set_locale (LC_MESSAGES, "C.UTF-8")) + { + g_free (old_messages_locale); + setlocale (LC_TIME, old_time_locale); + g_free (old_time_locale); + return; + } + } + else + { + if (!check_and_set_locale (LC_MESSAGES, "de_DE.iso88591")) + { + g_free (old_messages_locale); + setlocale (LC_TIME, old_time_locale); + g_free (old_time_locale); + return; + } + } + + if (!test_data->utf8_time) + { + /* March to have März in german */ + TEST_PRINTF_DATE (2020, 3, 1, "%b", "Mär"); + TEST_PRINTF_DATE (2020, 3, 1, "%B", "März"); + } + else + { + TEST_PRINTF_DATE (2020, 3, 1, "%b", "mar"); + TEST_PRINTF_DATE (2020, 3, 1, "%B", "march"); + } + + setlocale (LC_TIME, old_time_locale); + setlocale (LC_MESSAGES, old_messages_locale); + g_free (old_time_locale); + g_free (old_messages_locale); +} + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-y2k" static void @@ -2980,6 +3090,18 @@ main (gint argc, g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf); g_test_add_func ("/GDateTime/format_unrepresentable", test_format_unrepresentable); g_test_add_func ("/GDateTime/format_iso8601", test_format_iso8601); + g_test_add_data_func ("/GDateTime/format_mixed/utf8_time_non_utf8_messages", + &utf8_time_non_utf8_messages, + test_format_time_mixed_utf8); + g_test_add_data_func ("/GDateTime/format_mixed/utf8_time_utf8_messages", + &utf8_time_utf8_messages, + test_format_time_mixed_utf8); + g_test_add_data_func ("/GDateTime/format_mixed/non_utf8_time_non_utf8_messages", + &non_utf8_time_non_utf8_messages, + test_format_time_mixed_utf8); + g_test_add_data_func ("/GDateTime/format_mixed/non_utf8_time_utf8_messages", + &non_utf8_time_utf8_messages, + test_format_time_mixed_utf8); g_test_add_func ("/GDateTime/strftime", test_strftime); g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling); g_test_add_func ("/GDateTime/modifiers", test_modifiers); diff --git a/meson.build b/meson.build index 2cf1c7b4f..eb401123c 100644 --- a/meson.build +++ b/meson.build @@ -1185,6 +1185,15 @@ if cc.links('''#ifndef _GNU_SOURCE glib_conf.set('HAVE_LANGINFO_ABALTMON', 1) endif +# Check for nl_langinfo and _NL_TIME_CODESET +if cc.links('''#include + int main (int argc, char ** argv) { + char *codeset = nl_langinfo (_NL_TIME_CODESET); + return 0; + }''', name : 'nl_langinfo and _NL_TIME_CODESET') + glib_conf.set('HAVE_LANGINFO_TIME_CODESET', 1) +endif + # Check if C compiler supports the 'signed' keyword if not cc.compiles('''signed char x;''', name : 'signed') glib_conf.set('signed', '/* NOOP */')