Merge branch '2055-correct-date-format-utf8' into 'master'

Resolve "g_date_time_format() does not return UTF-8 if LC_TIME is not UTF8 but other locale settings are UTF-8"

Closes #2055

See merge request GNOME/glib!1777
This commit is contained in:
Philip Withnall 2021-03-31 16:16:16 +00:00
commit e7f6799668
8 changed files with 341 additions and 33 deletions

View File

@ -36,6 +36,12 @@
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET)
#include <langinfo.h>
#endif
#include <locale.h>
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
@ -215,6 +221,87 @@ g_get_charset (const char **charset)
return cache->is_utf8; return cache->is_utf8;
} }
/*
* Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
* LC_TIME) to correctly check for charset about time conversion relatives.
*
* Returns: %TRUE if the returned charset is UTF-8
*/
gboolean
_g_get_time_charset (const char **charset)
{
static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
GCharsetCache *cache = g_private_get (&cache_private);
const gchar *raw;
if (!cache)
cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
#ifdef HAVE_LANGINFO_TIME_CODESET
raw = nl_langinfo (_NL_TIME_CODESET);
#else
G_LOCK (aliases);
raw = _g_locale_charset_raw ();
G_UNLOCK (aliases);
#endif
if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
{
const gchar *new_charset;
g_free (cache->raw);
g_free (cache->charset);
cache->raw = g_strdup (raw);
cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
cache->charset = g_strdup (new_charset);
}
if (charset)
*charset = cache->charset;
return cache->is_utf8;
}
/*
* Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
* LC_CTYPE) to correctly check for charset about CTYPE conversion relatives.
*
* Returns: %TRUE if the returned charset is UTF-8
*/
gboolean
_g_get_ctype_charset (const char **charset)
{
static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
GCharsetCache *cache = g_private_get (&cache_private);
const gchar *raw;
if (!cache)
cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
#ifdef HAVE_LANGINFO_CODESET
raw = nl_langinfo (CODESET);
#else
G_LOCK (aliases);
raw = _g_locale_charset_raw ();
G_UNLOCK (aliases);
#endif
if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
{
const gchar *new_charset;
g_free (cache->raw);
g_free (cache->charset);
cache->raw = g_strdup (raw);
cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
cache->charset = g_strdup (new_charset);
}
if (charset)
*charset = cache->charset;
return cache->is_utf8;
}
/** /**
* g_get_codeset: * g_get_codeset:
* *

View File

@ -25,6 +25,10 @@ G_BEGIN_DECLS
const char ** _g_charset_get_aliases (const char *canonical_name); const char ** _g_charset_get_aliases (const char *canonical_name);
gboolean _g_get_time_charset (const char **charset);
gboolean _g_get_ctype_charset (const char **charset);
G_END_DECLS G_END_DECLS
#endif #endif

View File

@ -40,6 +40,7 @@
#endif #endif
#include "gconvert.h" #include "gconvert.h"
#include "gconvertprivate.h"
#include "gcharsetprivate.h" #include "gcharsetprivate.h"
#include "gslist.h" #include "gslist.h"
@ -1015,6 +1016,52 @@ g_locale_to_utf8 (const gchar *opsysstring,
bytes_read, bytes_written, error); bytes_read, bytes_written, error);
} }
/*
* Do the exact same as g_locale_to_utf8 except that the charset would
* be retrieved from _g_get_time_charset (which uses LC_TIME)
*
* Returns: The converted string, or %NULL on an error.
*/
gchar *
_g_time_locale_to_utf8 (const gchar *opsysstring,
gssize len,
gsize *bytes_read,
gsize *bytes_written,
GError **error)
{
const char *charset;
if (_g_get_time_charset (&charset))
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
else
return convert_checked (opsysstring, len, "UTF-8", charset,
CONVERT_CHECK_NO_NULS_IN_OUTPUT,
bytes_read, bytes_written, error);
}
/*
* Do the exact same as g_locale_to_utf8 except that the charset would
* be retrieved from _g_get_ctype_charset (which uses LC_CTYPE)
*
* Returns: The converted string, or %NULL on an error.
*/
gchar *
_g_ctype_locale_to_utf8 (const gchar *opsysstring,
gssize len,
gsize *bytes_read,
gsize *bytes_written,
GError **error)
{
const char *charset;
if (_g_get_ctype_charset (&charset))
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
else
return convert_checked (opsysstring, len, "UTF-8", charset,
CONVERT_CHECK_NO_NULS_IN_OUTPUT,
bytes_read, bytes_written, error);
}
/** /**
* g_locale_from_utf8: * g_locale_from_utf8:
* @utf8string: a UTF-8 encoded string * @utf8string: a UTF-8 encoded string

40
glib/gconvertprivate.h Normal file
View File

@ -0,0 +1,40 @@
/* gconvertprivate.h - Private GLib gconvert functions
*
* Copyright 2020 Frederic Martinsons
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __G_CONVERTPRIVATE_H__
#define __G_CONVERTPRIVATE_H__
G_BEGIN_DECLS
#include "glib.h"
gchar *_g_time_locale_to_utf8 (const gchar *opsysstring,
gssize len,
gsize *bytes_read,
gsize *bytes_written,
GError **error) G_GNUC_MALLOC;
gchar *_g_ctype_locale_to_utf8 (const gchar *opsysstring,
gssize len,
gsize *bytes_read,
gsize *bytes_written,
GError **error) G_GNUC_MALLOC;
G_END_DECLS
#endif /* __G_CONVERTPRIVATE_H__ */

View File

@ -1206,6 +1206,22 @@ g_date_prepare_to_parse (const gchar *str,
g_date_fill_parse_tokens (str, pt); g_date_fill_parse_tokens (str, pt);
} }
static guint
convert_twodigit_year (gint y)
{
if (using_twodigit_years && y < 100)
{
guint two = twodigit_start_year % 100;
guint century = (twodigit_start_year / 100) * 100;
if (y < two)
century += 100;
y += century;
}
return y;
}
/** /**
* g_date_set_parse: * g_date_set_parse:
* @date: a #GDate to fill in * @date: a #GDate to fill in
@ -1302,16 +1318,8 @@ g_date_set_parse (GDate *d,
{ {
y += locale_era_adjust; y += locale_era_adjust;
} }
else if (using_twodigit_years && y < 100)
{ y = convert_twodigit_year (y);
guint two = twodigit_start_year % 100;
guint century = (twodigit_start_year / 100) * 100;
if (y < two)
century += 100;
y += century;
}
} }
break; break;
default: default:
@ -1355,18 +1363,8 @@ g_date_set_parse (GDate *d,
m = (pt.n[0]/100) % 100; m = (pt.n[0]/100) % 100;
day = pt.n[0] % 100; day = pt.n[0] % 100;
y = pt.n[0]/10000; y = pt.n[0]/10000;
/* FIXME move this into a separate function */ y = convert_twodigit_year (y);
if (using_twodigit_years && y < 100)
{
guint two = twodigit_start_year % 100;
guint century = (twodigit_start_year / 100) * 100;
if (y < two)
century += 100;
y += century;
}
} }
} }

View File

@ -60,23 +60,23 @@
#include <langinfo.h> #include <langinfo.h>
#endif #endif
#include "gdatetime.h"
#include "gslice.h"
#include "gatomic.h" #include "gatomic.h"
#include "gcharset.h" #include "gcharset.h"
#include "gcharsetprivate.h"
#include "gconvert.h" #include "gconvert.h"
#include "gconvertprivate.h"
#include "gdatetime.h"
#include "gfileutils.h" #include "gfileutils.h"
#include "ghash.h" #include "ghash.h"
#include "glibintl.h"
#include "gmain.h" #include "gmain.h"
#include "gmappedfile.h" #include "gmappedfile.h"
#include "gslice.h"
#include "gstrfuncs.h" #include "gstrfuncs.h"
#include "gtestutils.h" #include "gtestutils.h"
#include "gthread.h" #include "gthread.h"
#include "gtimezone.h" #include "gtimezone.h"
#include "glibintl.h"
#ifndef G_OS_WIN32 #ifndef G_OS_WIN32
#include <sys/time.h> #include <sys/time.h>
#include <time.h> #include <time.h>
@ -2871,7 +2871,7 @@ initialize_alt_digits (void)
if (g_strcmp0 (locale_digit, "") == 0) if (g_strcmp0 (locale_digit, "") == 0)
return NULL; return NULL;
digit = g_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL); digit = _g_ctype_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
if (digit == NULL) if (digit == NULL)
return NULL; return NULL;
@ -2995,7 +2995,7 @@ g_date_time_format_locale (GDateTime *datetime,
if (locale_is_utf8) if (locale_is_utf8)
return g_date_time_format_utf8 (datetime, locale_format, outstr, locale_is_utf8); return g_date_time_format_utf8 (datetime, locale_format, outstr, locale_is_utf8);
utf8_format = g_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL); utf8_format = _g_time_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
if (utf8_format == NULL) if (utf8_format == NULL)
return FALSE; return FALSE;
@ -3019,7 +3019,7 @@ string_append (GString *string,
} }
else else
{ {
utf8 = g_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL); utf8 = _g_time_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
if (utf8 == NULL) if (utf8 == NULL)
return FALSE; return FALSE;
g_string_append_len (string, utf8, utf8_len); g_string_append_len (string, utf8, utf8_len);
@ -3445,10 +3445,11 @@ g_date_time_format (GDateTime *datetime,
{ {
GString *outstr; GString *outstr;
const gchar *charset; const gchar *charset;
/* Avoid conversions from locale charset to UTF-8 if charset is compatible /* Avoid conversions from locale (for LC_TIME and not for LC_MESSAGES unless
* specified otherwise) charset to UTF-8 if charset is compatible
* with UTF-8 already. Check for UTF-8 and synonymous canonical names of * with UTF-8 already. Check for UTF-8 and synonymous canonical names of
* ASCII. */ * ASCII. */
gboolean locale_is_utf8_compatible = g_get_charset (&charset) || gboolean time_is_utf8_compatible = _g_get_time_charset (&charset) ||
g_strcmp0 ("ASCII", charset) == 0 || g_strcmp0 ("ASCII", charset) == 0 ||
g_strcmp0 ("ANSI_X3.4-1968", charset) == 0; g_strcmp0 ("ANSI_X3.4-1968", charset) == 0;
@ -3459,7 +3460,7 @@ g_date_time_format (GDateTime *datetime,
outstr = g_string_sized_new (strlen (format) * 2); outstr = g_string_sized_new (strlen (format) * 2);
if (!g_date_time_format_utf8 (datetime, format, outstr, if (!g_date_time_format_utf8 (datetime, format, outstr,
locale_is_utf8_compatible)) time_is_utf8_compatible))
{ {
g_string_free (outstr, TRUE); g_string_free (outstr, TRUE);
return NULL; return NULL;

View File

@ -2318,6 +2318,116 @@ test_format_iso8601 (void)
g_time_zone_unref (tz); g_time_zone_unref (tz);
} }
typedef struct
{
gboolean utf8_messages;
gboolean utf8_time;
} MixedUtf8TestData;
static const MixedUtf8TestData utf8_time_non_utf8_messages = {
.utf8_messages = FALSE,
.utf8_time = TRUE
};
static const MixedUtf8TestData non_utf8_time_utf8_messages = {
.utf8_messages = TRUE,
.utf8_time = FALSE
};
static const MixedUtf8TestData utf8_time_utf8_messages = {
.utf8_messages = TRUE,
.utf8_time = TRUE
};
static const MixedUtf8TestData non_utf8_time_non_utf8_messages = {
.utf8_messages = FALSE,
.utf8_time = FALSE
};
static gboolean
check_and_set_locale (int category,
const gchar *name)
{
setlocale (category, name);
if (strstr (setlocale (category, NULL), name) == NULL)
{
g_print ("Unavaible '%s' locale\n", name);
g_test_skip ("required locale not available, skipping tests");
return FALSE;
}
return TRUE;
}
static void
test_format_time_mixed_utf8 (gconstpointer data)
{
const MixedUtf8TestData *test_data;
gchar *old_time_locale;
gchar *old_messages_locale;
g_test_bug ("https://gitlab.gnome.org/GNOME/glib/-/issues/2055");
test_data = (MixedUtf8TestData *) data;
old_time_locale = g_strdup (setlocale (LC_TIME, NULL));
old_messages_locale = g_strdup (setlocale (LC_MESSAGES, NULL));
if (test_data->utf8_time)
{
if (!check_and_set_locale (LC_TIME, "C.UTF-8"))
{
g_free (old_time_locale);
setlocale (LC_MESSAGES, old_messages_locale);
g_free (old_messages_locale);
return;
}
}
else
{
if (!check_and_set_locale (LC_TIME, "de_DE.iso88591"))
{
g_free (old_time_locale);
setlocale (LC_MESSAGES, old_messages_locale);
g_free (old_messages_locale);
return;
}
}
if (test_data->utf8_messages)
{
if (!check_and_set_locale (LC_MESSAGES, "C.UTF-8"))
{
g_free (old_messages_locale);
setlocale (LC_TIME, old_time_locale);
g_free (old_time_locale);
return;
}
}
else
{
if (!check_and_set_locale (LC_MESSAGES, "de_DE.iso88591"))
{
g_free (old_messages_locale);
setlocale (LC_TIME, old_time_locale);
g_free (old_time_locale);
return;
}
}
if (!test_data->utf8_time)
{
/* March to have März in german */
TEST_PRINTF_DATE (2020, 3, 1, "%b", "Mär");
TEST_PRINTF_DATE (2020, 3, 1, "%B", "März");
}
else
{
TEST_PRINTF_DATE (2020, 3, 1, "%b", "mar");
TEST_PRINTF_DATE (2020, 3, 1, "%B", "march");
}
setlocale (LC_TIME, old_time_locale);
setlocale (LC_MESSAGES, old_messages_locale);
g_free (old_time_locale);
g_free (old_messages_locale);
}
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-y2k" #pragma GCC diagnostic ignored "-Wformat-y2k"
static void static void
@ -2980,6 +3090,18 @@ main (gint argc,
g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf); g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf);
g_test_add_func ("/GDateTime/format_unrepresentable", test_format_unrepresentable); g_test_add_func ("/GDateTime/format_unrepresentable", test_format_unrepresentable);
g_test_add_func ("/GDateTime/format_iso8601", test_format_iso8601); g_test_add_func ("/GDateTime/format_iso8601", test_format_iso8601);
g_test_add_data_func ("/GDateTime/format_mixed/utf8_time_non_utf8_messages",
&utf8_time_non_utf8_messages,
test_format_time_mixed_utf8);
g_test_add_data_func ("/GDateTime/format_mixed/utf8_time_utf8_messages",
&utf8_time_utf8_messages,
test_format_time_mixed_utf8);
g_test_add_data_func ("/GDateTime/format_mixed/non_utf8_time_non_utf8_messages",
&non_utf8_time_non_utf8_messages,
test_format_time_mixed_utf8);
g_test_add_data_func ("/GDateTime/format_mixed/non_utf8_time_utf8_messages",
&non_utf8_time_utf8_messages,
test_format_time_mixed_utf8);
g_test_add_func ("/GDateTime/strftime", test_strftime); g_test_add_func ("/GDateTime/strftime", test_strftime);
g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling); g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling);
g_test_add_func ("/GDateTime/modifiers", test_modifiers); g_test_add_func ("/GDateTime/modifiers", test_modifiers);

View File

@ -1190,6 +1190,15 @@ if cc.links('''#ifndef _GNU_SOURCE
glib_conf.set('HAVE_LANGINFO_ABALTMON', 1) glib_conf.set('HAVE_LANGINFO_ABALTMON', 1)
endif endif
# Check for nl_langinfo and _NL_TIME_CODESET
if cc.links('''#include <langinfo.h>
int main (int argc, char ** argv) {
char *codeset = nl_langinfo (_NL_TIME_CODESET);
return 0;
}''', name : 'nl_langinfo and _NL_TIME_CODESET')
glib_conf.set('HAVE_LANGINFO_TIME_CODESET', 1)
endif
# Check if C compiler supports the 'signed' keyword # Check if C compiler supports the 'signed' keyword
if not cc.compiles('''signed char x;''', name : 'signed') if not cc.compiles('''signed char x;''', name : 'signed')
glib_conf.set('signed', '/* NOOP */') glib_conf.set('signed', '/* NOOP */')