Add private functions to correctly convert datetime when LC_TIME is not UTF8

Functions (_g_get_time_charset and _g_get_ctype_charset) to get LC_TIME and LC_CTYPE charset
by using nl_langinfo with _NL_TIME_CODESET and CODESET).
Another functions (_g_locale_time_to_utf8 and _g_locale_ctype_to_utf8) which uses thel and format
the input string accordingly.
Add new test cases with mixing UTF8 and non UTF8 LC_TIME along with UTF8
and non UTF8 LC_MESSAGES.

Closed #2055

Signed-off-by: Frederic Martinsons <frederic.martinsons@sigfox.com>
This commit is contained in:
Frederic Martinsons 2020-12-01 12:47:27 +01:00
parent c4df3b23c4
commit 782eb1f7af
7 changed files with 318 additions and 6 deletions

View File

@ -36,6 +36,12 @@
#include <string.h>
#include <stdio.h>
#if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET)
#include <langinfo.h>
#endif
#include <locale.h>
#ifdef G_OS_WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
@ -215,6 +221,87 @@ g_get_charset (const char **charset)
return cache->is_utf8;
}
/*
* Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
* LC_TIME) to correctly check for charset about time conversion relatives.
*
* Returns: %TRUE if the returned charset is UTF-8
*/
gboolean
_g_get_time_charset (const char **charset)
{
static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
GCharsetCache *cache = g_private_get (&cache_private);
const gchar *raw;
if (!cache)
cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
#ifdef HAVE_LANGINFO_TIME_CODESET
raw = nl_langinfo (_NL_TIME_CODESET);
#else
G_LOCK (aliases);
raw = _g_locale_charset_raw ();
G_UNLOCK (aliases);
#endif
if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
{
const gchar *new_charset;
g_free (cache->raw);
g_free (cache->charset);
cache->raw = g_strdup (raw);
cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
cache->charset = g_strdup (new_charset);
}
if (charset)
*charset = cache->charset;
return cache->is_utf8;
}
/*
* Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
* LC_CTYPE) to correctly check for charset about CTYPE conversion relatives.
*
* Returns: %TRUE if the returned charset is UTF-8
*/
gboolean
_g_get_ctype_charset (const char **charset)
{
static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
GCharsetCache *cache = g_private_get (&cache_private);
const gchar *raw;
if (!cache)
cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
#ifdef HAVE_LANGINFO_CODESET
raw = nl_langinfo (CODESET);
#else
G_LOCK (aliases);
raw = _g_locale_charset_raw ();
G_UNLOCK (aliases);
#endif
if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
{
const gchar *new_charset;
g_free (cache->raw);
g_free (cache->charset);
cache->raw = g_strdup (raw);
cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
cache->charset = g_strdup (new_charset);
}
if (charset)
*charset = cache->charset;
return cache->is_utf8;
}
/**
* g_get_codeset:
*

View File

@ -25,6 +25,10 @@ G_BEGIN_DECLS
const char ** _g_charset_get_aliases (const char *canonical_name);
gboolean _g_get_time_charset (const char **charset);
gboolean _g_get_ctype_charset (const char **charset);
G_END_DECLS
#endif

View File

@ -40,6 +40,7 @@
#endif
#include "gconvert.h"
#include "gconvertprivate.h"
#include "gcharsetprivate.h"
#include "gslist.h"
@ -1015,6 +1016,52 @@ g_locale_to_utf8 (const gchar *opsysstring,
bytes_read, bytes_written, error);
}
/*
* Do the exact same as g_locale_to_utf8 except that the charset would
* be retrieved from _g_get_time_charset (which uses LC_TIME)
*
* Returns: The converted string, or %NULL on an error.
*/
gchar *
_g_time_locale_to_utf8 (const gchar *opsysstring,
gssize len,
gsize *bytes_read,
gsize *bytes_written,
GError **error)
{
const char *charset;
if (_g_get_time_charset (&charset))
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
else
return convert_checked (opsysstring, len, "UTF-8", charset,
CONVERT_CHECK_NO_NULS_IN_OUTPUT,
bytes_read, bytes_written, error);
}
/*
* Do the exact same as g_locale_to_utf8 except that the charset would
* be retrieved from _g_get_ctype_charset (which uses LC_CTYPE)
*
* Returns: The converted string, or %NULL on an error.
*/
gchar *
_g_ctype_locale_to_utf8 (const gchar *opsysstring,
gssize len,
gsize *bytes_read,
gsize *bytes_written,
GError **error)
{
const char *charset;
if (_g_get_ctype_charset (&charset))
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
else
return convert_checked (opsysstring, len, "UTF-8", charset,
CONVERT_CHECK_NO_NULS_IN_OUTPUT,
bytes_read, bytes_written, error);
}
/**
* g_locale_from_utf8:
* @utf8string: a UTF-8 encoded string

40
glib/gconvertprivate.h Normal file
View File

@ -0,0 +1,40 @@
/* gconvertprivate.h - Private GLib gconvert functions
*
* Copyright 2020 Frederic Martinsons
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __G_CONVERTPRIVATE_H__
#define __G_CONVERTPRIVATE_H__
G_BEGIN_DECLS
#include "glib.h"
gchar *_g_time_locale_to_utf8 (const gchar *opsysstring,
gssize len,
gsize *bytes_read,
gsize *bytes_written,
GError **error) G_GNUC_MALLOC;
gchar *_g_ctype_locale_to_utf8 (const gchar *opsysstring,
gssize len,
gsize *bytes_read,
gsize *bytes_written,
GError **error) G_GNUC_MALLOC;
G_END_DECLS
#endif /* __G_CONVERTPRIVATE_H__ */

View File

@ -62,7 +62,9 @@
#include "gatomic.h"
#include "gcharset.h"
#include "gcharsetprivate.h"
#include "gconvert.h"
#include "gconvertprivate.h"
#include "gdatetime.h"
#include "gfileutils.h"
#include "ghash.h"
@ -2869,7 +2871,7 @@ initialize_alt_digits (void)
if (g_strcmp0 (locale_digit, "") == 0)
return NULL;
digit = g_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
digit = _g_ctype_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
if (digit == NULL)
return NULL;
@ -2993,7 +2995,7 @@ g_date_time_format_locale (GDateTime *datetime,
if (locale_is_utf8)
return g_date_time_format_utf8 (datetime, locale_format, outstr, locale_is_utf8);
utf8_format = g_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
utf8_format = _g_time_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
if (utf8_format == NULL)
return FALSE;
@ -3017,7 +3019,7 @@ string_append (GString *string,
}
else
{
utf8 = g_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
utf8 = _g_time_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
if (utf8 == NULL)
return FALSE;
g_string_append_len (string, utf8, utf8_len);
@ -3443,10 +3445,11 @@ g_date_time_format (GDateTime *datetime,
{
GString *outstr;
const gchar *charset;
/* Avoid conversions from locale charset to UTF-8 if charset is compatible
/* Avoid conversions from locale (for LC_TIME and not for LC_MESSAGES unless
* specified otherwise) charset to UTF-8 if charset is compatible
* with UTF-8 already. Check for UTF-8 and synonymous canonical names of
* ASCII. */
gboolean locale_is_utf8_compatible = g_get_charset (&charset) ||
gboolean time_is_utf8_compatible = _g_get_time_charset (&charset) ||
g_strcmp0 ("ASCII", charset) == 0 ||
g_strcmp0 ("ANSI_X3.4-1968", charset) == 0;
@ -3457,7 +3460,7 @@ g_date_time_format (GDateTime *datetime,
outstr = g_string_sized_new (strlen (format) * 2);
if (!g_date_time_format_utf8 (datetime, format, outstr,
locale_is_utf8_compatible))
time_is_utf8_compatible))
{
g_string_free (outstr, TRUE);
return NULL;

View File

@ -2318,6 +2318,116 @@ test_format_iso8601 (void)
g_time_zone_unref (tz);
}
typedef struct
{
gboolean utf8_messages;
gboolean utf8_time;
} MixedUtf8TestData;
static const MixedUtf8TestData utf8_time_non_utf8_messages = {
.utf8_messages = FALSE,
.utf8_time = TRUE
};
static const MixedUtf8TestData non_utf8_time_utf8_messages = {
.utf8_messages = TRUE,
.utf8_time = FALSE
};
static const MixedUtf8TestData utf8_time_utf8_messages = {
.utf8_messages = TRUE,
.utf8_time = TRUE
};
static const MixedUtf8TestData non_utf8_time_non_utf8_messages = {
.utf8_messages = FALSE,
.utf8_time = FALSE
};
static gboolean
check_and_set_locale (int category,
const gchar *name)
{
setlocale (category, name);
if (strstr (setlocale (category, NULL), name) == NULL)
{
g_print ("Unavaible '%s' locale\n", name);
g_test_skip ("required locale not available, skipping tests");
return FALSE;
}
return TRUE;
}
static void
test_format_time_mixed_utf8 (gconstpointer data)
{
const MixedUtf8TestData *test_data;
gchar *old_time_locale;
gchar *old_messages_locale;
g_test_bug ("https://gitlab.gnome.org/GNOME/glib/-/issues/2055");
test_data = (MixedUtf8TestData *) data;
old_time_locale = g_strdup (setlocale (LC_TIME, NULL));
old_messages_locale = g_strdup (setlocale (LC_MESSAGES, NULL));
if (test_data->utf8_time)
{
if (!check_and_set_locale (LC_TIME, "C.UTF-8"))
{
g_free (old_time_locale);
setlocale (LC_MESSAGES, old_messages_locale);
g_free (old_messages_locale);
return;
}
}
else
{
if (!check_and_set_locale (LC_TIME, "de_DE.iso88591"))
{
g_free (old_time_locale);
setlocale (LC_MESSAGES, old_messages_locale);
g_free (old_messages_locale);
return;
}
}
if (test_data->utf8_messages)
{
if (!check_and_set_locale (LC_MESSAGES, "C.UTF-8"))
{
g_free (old_messages_locale);
setlocale (LC_TIME, old_time_locale);
g_free (old_time_locale);
return;
}
}
else
{
if (!check_and_set_locale (LC_MESSAGES, "de_DE.iso88591"))
{
g_free (old_messages_locale);
setlocale (LC_TIME, old_time_locale);
g_free (old_time_locale);
return;
}
}
if (!test_data->utf8_time)
{
/* March to have März in german */
TEST_PRINTF_DATE (2020, 3, 1, "%b", "Mär");
TEST_PRINTF_DATE (2020, 3, 1, "%B", "März");
}
else
{
TEST_PRINTF_DATE (2020, 3, 1, "%b", "mar");
TEST_PRINTF_DATE (2020, 3, 1, "%B", "march");
}
setlocale (LC_TIME, old_time_locale);
setlocale (LC_MESSAGES, old_messages_locale);
g_free (old_time_locale);
g_free (old_messages_locale);
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-y2k"
static void
@ -2980,6 +3090,18 @@ main (gint argc,
g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf);
g_test_add_func ("/GDateTime/format_unrepresentable", test_format_unrepresentable);
g_test_add_func ("/GDateTime/format_iso8601", test_format_iso8601);
g_test_add_data_func ("/GDateTime/format_mixed/utf8_time_non_utf8_messages",
&utf8_time_non_utf8_messages,
test_format_time_mixed_utf8);
g_test_add_data_func ("/GDateTime/format_mixed/utf8_time_utf8_messages",
&utf8_time_utf8_messages,
test_format_time_mixed_utf8);
g_test_add_data_func ("/GDateTime/format_mixed/non_utf8_time_non_utf8_messages",
&non_utf8_time_non_utf8_messages,
test_format_time_mixed_utf8);
g_test_add_data_func ("/GDateTime/format_mixed/non_utf8_time_utf8_messages",
&non_utf8_time_utf8_messages,
test_format_time_mixed_utf8);
g_test_add_func ("/GDateTime/strftime", test_strftime);
g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling);
g_test_add_func ("/GDateTime/modifiers", test_modifiers);

View File

@ -1185,6 +1185,15 @@ if cc.links('''#ifndef _GNU_SOURCE
glib_conf.set('HAVE_LANGINFO_ABALTMON', 1)
endif
# Check for nl_langinfo and _NL_TIME_CODESET
if cc.links('''#include <langinfo.h>
int main (int argc, char ** argv) {
char *codeset = nl_langinfo (_NL_TIME_CODESET);
return 0;
}''', name : 'nl_langinfo and _NL_TIME_CODESET')
glib_conf.set('HAVE_LANGINFO_TIME_CODESET', 1)
endif
# Check if C compiler supports the 'signed' keyword
if not cc.compiles('''signed char x;''', name : 'signed')
glib_conf.set('signed', '/* NOOP */')