gdatetime: Store intermediate result of g_date_time_format in UTF-8

In date time formatting routine, instead of converting from UTF-8 to
locale charset and then from locale charset to UTF-8, store all
intermediate result in UTF-8.

This solves the issue where user provided UTF-8 format string might be
unrepresentable in the current locale charset.

Fixes issue #1605.
This commit is contained in:
Tomasz Miąsko 2018-11-30 00:00:00 +00:00 committed by Philip Withnall
parent 219bed2304
commit 74c1468c98
2 changed files with 179 additions and 159 deletions

View File

@ -4,6 +4,7 @@
* Copyright (C) 2010 Thiago Santos <thiago.sousa.santos@collabora.co.uk> * Copyright (C) 2010 Thiago Santos <thiago.sousa.santos@collabora.co.uk>
* Copyright (C) 2010 Emmanuele Bassi <ebassi@linux.intel.com> * Copyright (C) 2010 Emmanuele Bassi <ebassi@linux.intel.com>
* Copyright © 2010 Codethink Limited * Copyright © 2010 Codethink Limited
* Copyright © 2018 Tomasz Miąsko
* *
* This library is free software; you can redistribute it and/or modify * This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as * it under the terms of the GNU Lesser General Public License as
@ -2771,6 +2772,51 @@ format_z (GString *outstr,
return TRUE; return TRUE;
} }
#ifdef HAVE_LANGINFO_OUTDIGIT
/** Initializes the array with UTF-8 encoded alternate digits suibtable for use
* in current locale. Returns NULL when current locale does not use alternate
* digits or there was an error converting them to UTF-8.
*/
static const gchar * const *
initialize_alt_digits (void)
{
guint i;
gsize digit_len;
gchar *digit;
const gchar *locale_digit;
#define N_DIGITS 10
#define MAX_UTF8_ENCODING_LEN 4
static gchar buffer[N_DIGITS * (MAX_UTF8_ENCODING_LEN + 1 /* null separator */)];
#undef N_DIGITS
#undef MAX_UTF8_ENCODING_LEN
gchar *buffer_end = buffer;
static const gchar *alt_digits[10];
for (i = 0; i != 10; ++i)
{
locale_digit = nl_langinfo (_NL_CTYPE_OUTDIGIT0_MB + i);
if (g_strcmp0 (locale_digit, "") == 0)
return NULL;
digit = g_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
if (digit == NULL)
return NULL;
g_assert (digit_len < buffer + sizeof (buffer) - buffer_end);
alt_digits[i] = buffer_end;
buffer_end = g_stpcpy (buffer_end, digit);
/* skip trailing null byte */
buffer_end += 1;
g_free (digit);
}
return alt_digits;
}
#endif /* HAVE_LANGINFO_OUTDIGIT */
static void static void
format_number (GString *str, format_number (GString *str,
gboolean use_alt_digits, gboolean use_alt_digits,
@ -2781,7 +2827,7 @@ format_number (GString *str,
const gchar *ascii_digits[10] = { const gchar *ascii_digits[10] = {
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9" "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"
}; };
const gchar **digits = ascii_digits; const gchar * const *digits = ascii_digits;
const gchar *tmp[10]; const gchar *tmp[10];
gint i = 0; gint i = 0;
@ -2790,17 +2836,16 @@ format_number (GString *str,
#ifdef HAVE_LANGINFO_OUTDIGIT #ifdef HAVE_LANGINFO_OUTDIGIT
if (use_alt_digits) if (use_alt_digits)
{ {
static const gchar *alt_digits[10]; static const gchar * const *alt_digits = NULL;
static gsize initialised; static gsize initialised;
/* 2^32 has 10 digits */
if G_UNLIKELY (g_once_init_enter (&initialised)) if G_UNLIKELY (g_once_init_enter (&initialised))
{ {
#define DO_DIGIT(n) \ alt_digits = initialize_alt_digits ();
alt_digits[n] = nl_langinfo (_NL_CTYPE_OUTDIGIT## n ##_MB)
DO_DIGIT(0); DO_DIGIT(1); DO_DIGIT(2); DO_DIGIT(3); DO_DIGIT(4); if (alt_digits == NULL)
DO_DIGIT(5); DO_DIGIT(6); DO_DIGIT(7); DO_DIGIT(8); DO_DIGIT(9); alt_digits = ascii_digits;
#undef DO_DIGIT
g_once_init_leave (&initialised, TRUE); g_once_init_leave (&initialised, TRUE);
} }
@ -2833,7 +2878,6 @@ format_ampm (GDateTime *datetime,
{ {
const gchar *ampm; const gchar *ampm;
gchar *tmp = NULL, *ampm_dup; gchar *tmp = NULL, *ampm_dup;
gsize len;
ampm = GET_AMPM (datetime); ampm = GET_AMPM (datetime);
@ -2844,104 +2888,107 @@ format_ampm (GDateTime *datetime,
{ {
/* This assumes that locale encoding can't have embedded NULs */ /* This assumes that locale encoding can't have embedded NULs */
ampm = tmp = g_locale_to_utf8 (ampm, -1, NULL, NULL, NULL); ampm = tmp = g_locale_to_utf8 (ampm, -1, NULL, NULL, NULL);
if (!tmp) if (tmp == NULL)
return FALSE; return FALSE;
} }
if (uppercase) if (uppercase)
ampm_dup = g_utf8_strup (ampm, -1); ampm_dup = g_utf8_strup (ampm, -1);
else else
ampm_dup = g_utf8_strdown (ampm, -1); ampm_dup = g_utf8_strdown (ampm, -1);
len = strlen (ampm_dup); g_free (tmp);
if (!locale_is_utf8 && GET_AMPM_IS_LOCALE)
{ g_string_append (outstr, ampm_dup);
g_free (tmp);
tmp = g_locale_from_utf8 (ampm_dup, -1, NULL, &len, NULL);
g_free (ampm_dup);
if (!tmp)
return FALSE;
ampm_dup = tmp;
}
g_string_append_len (outstr, ampm_dup, len);
g_free (ampm_dup); g_free (ampm_dup);
return TRUE; return TRUE;
} }
static gboolean g_date_time_format_locale (GDateTime *datetime, static gboolean g_date_time_format_utf8 (GDateTime *datetime,
const gchar *format, const gchar *format,
GString *outstr, GString *outstr,
gboolean locale_is_utf8); gboolean locale_is_utf8);
/* g_date_time_format() subroutine that takes a locale-encoded format /* g_date_time_format() subroutine that takes a locale-encoded format
* string and produces a locale-encoded date/time string. * string and produces a UTF-8 encoded date/time string.
*/ */
static gboolean static gboolean
g_date_time_locale_format_locale (GDateTime *datetime, g_date_time_format_locale (GDateTime *datetime,
const gchar *format, const gchar *locale_format,
GString *outstr, GString *outstr,
gboolean locale_is_utf8) gboolean locale_is_utf8)
{ {
gchar *utf8_format; gchar *utf8_format;
gboolean success; gboolean success;
if (locale_is_utf8) if (locale_is_utf8)
return g_date_time_format_locale (datetime, format, outstr, return g_date_time_format_utf8 (datetime, locale_format, outstr, locale_is_utf8);
locale_is_utf8);
utf8_format = g_locale_to_utf8 (format, -1, NULL, NULL, NULL); utf8_format = g_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
if (!utf8_format) if (utf8_format == NULL)
return FALSE; return FALSE;
success = g_date_time_format_locale (datetime, utf8_format, outstr, success = g_date_time_format_utf8 (datetime, utf8_format, outstr,
locale_is_utf8); locale_is_utf8);
g_free (utf8_format); g_free (utf8_format);
return success; return success;
} }
/* g_date_time_format() subroutine that takes a UTF-8 format static inline gboolean
* string and produces a locale-encoded date/time string. string_append (GString *string,
const gchar *s,
gboolean s_is_utf8)
{
gchar *utf8;
gsize utf8_len;
if (s_is_utf8)
{
g_string_append (string, s);
}
else
{
utf8 = g_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
if (utf8 == NULL)
return FALSE;
g_string_append_len (string, utf8, utf8_len);
g_free (utf8);
}
return TRUE;
}
/* g_date_time_format() subroutine that takes a UTF-8 encoded format
* string and produces a UTF-8 encoded date/time string.
*/ */
static gboolean static gboolean
g_date_time_format_locale (GDateTime *datetime, g_date_time_format_utf8 (GDateTime *datetime,
const gchar *format, const gchar *utf8_format,
GString *outstr, GString *outstr,
gboolean locale_is_utf8) gboolean locale_is_utf8)
{ {
guint len; guint len;
guint colons; guint colons;
gchar *tmp;
gsize tmp_len;
gunichar c; gunichar c;
gboolean alt_digits = FALSE; gboolean alt_digits = FALSE;
gboolean pad_set = FALSE; gboolean pad_set = FALSE;
gboolean name_is_utf8;
const gchar *pad = ""; const gchar *pad = "";
const gchar *name; const gchar *name;
const gchar *tz; const gchar *tz;
while (*format) while (*utf8_format)
{ {
len = strcspn (format, "%"); len = strcspn (utf8_format, "%");
if (len) if (len)
{ g_string_append_len (outstr, utf8_format, len);
if (locale_is_utf8)
g_string_append_len (outstr, format, len);
else
{
tmp = g_locale_from_utf8 (format, len, NULL, &tmp_len, NULL);
if (!tmp)
return FALSE;
g_string_append_len (outstr, tmp, tmp_len);
g_free (tmp);
}
}
format += len; utf8_format += len;
if (!*format) if (!*utf8_format)
break; break;
g_assert (*format == '%'); g_assert (*utf8_format == '%');
format++; utf8_format++;
if (!*format) if (!*utf8_format)
break; break;
colons = 0; colons = 0;
@ -2949,91 +2996,67 @@ g_date_time_format_locale (GDateTime *datetime,
pad_set = FALSE; pad_set = FALSE;
next_mod: next_mod:
c = g_utf8_get_char (format); c = g_utf8_get_char (utf8_format);
format = g_utf8_next_char (format); utf8_format = g_utf8_next_char (utf8_format);
switch (c) switch (c)
{ {
case 'a': case 'a':
name = WEEKDAY_ABBR (datetime); name = WEEKDAY_ABBR (datetime);
if (g_strcmp0 (name, "") == 0) if (g_strcmp0 (name, "") == 0)
return FALSE; return FALSE;
if (!locale_is_utf8 && !WEEKDAY_ABBR_IS_LOCALE)
{ name_is_utf8 = locale_is_utf8 || !WEEKDAY_ABBR_IS_LOCALE;
tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
if (!tmp) if (!string_append (outstr, name, name_is_utf8))
return FALSE; return FALSE;
g_string_append_len (outstr, tmp, tmp_len);
g_free (tmp);
}
else
{
g_string_append (outstr, name);
}
break; break;
case 'A': case 'A':
name = WEEKDAY_FULL (datetime); name = WEEKDAY_FULL (datetime);
if (g_strcmp0 (name, "") == 0) if (g_strcmp0 (name, "") == 0)
return FALSE; return FALSE;
if (!locale_is_utf8 && !WEEKDAY_FULL_IS_LOCALE)
{ name_is_utf8 = locale_is_utf8 || !WEEKDAY_FULL_IS_LOCALE;
tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
if (!tmp) if (!string_append (outstr, name, name_is_utf8))
return FALSE; return FALSE;
g_string_append_len (outstr, tmp, tmp_len);
g_free (tmp);
}
else
{
g_string_append (outstr, name);
}
break; break;
case 'b': case 'b':
name = alt_digits ? MONTH_ABBR_STANDALONE (datetime) name = alt_digits ? MONTH_ABBR_STANDALONE (datetime)
: MONTH_ABBR_WITH_DAY (datetime); : MONTH_ABBR_WITH_DAY (datetime);
if (g_strcmp0 (name, "") == 0) if (g_strcmp0 (name, "") == 0)
return FALSE; return FALSE;
if (!locale_is_utf8 &&
((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) || name_is_utf8 = locale_is_utf8 ||
(!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE))) ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
{ (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE));
tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
if (!tmp) if (!string_append (outstr, name, name_is_utf8))
return FALSE; return FALSE;
g_string_append_len (outstr, tmp, tmp_len);
g_free (tmp);
}
else
{
g_string_append (outstr, name);
}
break; break;
case 'B': case 'B':
name = alt_digits ? MONTH_FULL_STANDALONE (datetime) name = alt_digits ? MONTH_FULL_STANDALONE (datetime)
: MONTH_FULL_WITH_DAY (datetime); : MONTH_FULL_WITH_DAY (datetime);
if (g_strcmp0 (name, "") == 0) if (g_strcmp0 (name, "") == 0)
return FALSE; return FALSE;
if (!locale_is_utf8 &&
((alt_digits && !MONTH_FULL_STANDALONE_IS_LOCALE) || name_is_utf8 = locale_is_utf8 ||
(!alt_digits && !MONTH_FULL_WITH_DAY_IS_LOCALE))) ((alt_digits && !MONTH_FULL_STANDALONE_IS_LOCALE) ||
{ (!alt_digits && !MONTH_FULL_WITH_DAY_IS_LOCALE));
tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
if (!tmp) if (!string_append (outstr, name, name_is_utf8))
return FALSE; return FALSE;
g_string_append_len (outstr, tmp, tmp_len);
g_free (tmp);
}
else
{
g_string_append (outstr, name);
}
break; break;
case 'c': case 'c':
{ {
if (g_strcmp0 (PREFERRED_DATE_TIME_FMT, "") == 0) if (g_strcmp0 (PREFERRED_DATE_TIME_FMT, "") == 0)
return FALSE; return FALSE;
if (!g_date_time_locale_format_locale (datetime, PREFERRED_DATE_TIME_FMT, if (!g_date_time_format_locale (datetime, PREFERRED_DATE_TIME_FMT,
outstr, locale_is_utf8)) outstr, locale_is_utf8))
return FALSE; return FALSE;
} }
break; break;
case 'C': case 'C':
@ -3067,20 +3090,14 @@ g_date_time_format_locale (GDateTime *datetime,
: MONTH_ABBR_WITH_DAY (datetime); : MONTH_ABBR_WITH_DAY (datetime);
if (g_strcmp0 (name, "") == 0) if (g_strcmp0 (name, "") == 0)
return FALSE; return FALSE;
if (!locale_is_utf8 &&
((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) || name_is_utf8 = locale_is_utf8 ||
(!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE))) ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
{ (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE));
tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
if (!tmp) if (!string_append (outstr, name, name_is_utf8))
return FALSE; return FALSE;
g_string_append_len (outstr, tmp, tmp_len);
g_free (tmp);
}
else
{
g_string_append (outstr, name);
}
break; break;
case 'H': case 'H':
format_number (outstr, alt_digits, pad_set ? pad : "0", 2, format_number (outstr, alt_digits, pad_set ? pad : "0", 2,
@ -3128,8 +3145,8 @@ g_date_time_format_locale (GDateTime *datetime,
{ {
if (g_strcmp0 (PREFERRED_12HR_TIME_FMT, "") == 0) if (g_strcmp0 (PREFERRED_12HR_TIME_FMT, "") == 0)
return FALSE; return FALSE;
if (!g_date_time_locale_format_locale (datetime, PREFERRED_12HR_TIME_FMT, if (!g_date_time_format_locale (datetime, PREFERRED_12HR_TIME_FMT,
outstr, locale_is_utf8)) outstr, locale_is_utf8))
return FALSE; return FALSE;
} }
break; break;
@ -3170,8 +3187,8 @@ g_date_time_format_locale (GDateTime *datetime,
{ {
if (g_strcmp0 (PREFERRED_DATE_FMT, "") == 0) if (g_strcmp0 (PREFERRED_DATE_FMT, "") == 0)
return FALSE; return FALSE;
if (!g_date_time_locale_format_locale (datetime, PREFERRED_DATE_FMT, if (!g_date_time_format_locale (datetime, PREFERRED_DATE_FMT,
outstr, locale_is_utf8)) outstr, locale_is_utf8))
return FALSE; return FALSE;
} }
break; break;
@ -3179,8 +3196,8 @@ g_date_time_format_locale (GDateTime *datetime,
{ {
if (g_strcmp0 (PREFERRED_TIME_FMT, "") == 0) if (g_strcmp0 (PREFERRED_TIME_FMT, "") == 0)
return FALSE; return FALSE;
if (!g_date_time_locale_format_locale (datetime, PREFERRED_TIME_FMT, if (!g_date_time_format_locale (datetime, PREFERRED_TIME_FMT,
outstr, locale_is_utf8)) outstr, locale_is_utf8))
return FALSE; return FALSE;
} }
break; break;
@ -3202,16 +3219,7 @@ g_date_time_format_locale (GDateTime *datetime,
break; break;
case 'Z': case 'Z':
tz = g_date_time_get_timezone_abbreviation (datetime); tz = g_date_time_get_timezone_abbreviation (datetime);
tmp = NULL; g_string_append (outstr, tz);
tmp_len = strlen (tz);
if (!locale_is_utf8)
{
tz = tmp = g_locale_from_utf8 (tz, -1, NULL, &tmp_len, NULL);
if (!tmp)
return FALSE;
}
g_string_append_len (outstr, tz, tmp_len);
g_free (tmp);
break; break;
case '%': case '%':
g_string_append_c (outstr, '%'); g_string_append_c (outstr, '%');
@ -3230,7 +3238,7 @@ g_date_time_format_locale (GDateTime *datetime,
goto next_mod; goto next_mod;
case ':': case ':':
/* Colons are only allowed before 'z' */ /* Colons are only allowed before 'z' */
if (*format && *format != 'z' && *format != ':') if (*utf8_format && *utf8_format != 'z' && *utf8_format != ':')
return FALSE; return FALSE;
colons++; colons++;
goto next_mod; goto next_mod;
@ -3355,7 +3363,6 @@ g_date_time_format (GDateTime *datetime,
const gchar *format) const gchar *format)
{ {
GString *outstr; GString *outstr;
gchar *utf8;
gboolean locale_is_utf8 = g_get_charset (NULL); gboolean locale_is_utf8 = g_get_charset (NULL);
g_return_val_if_fail (datetime != NULL, NULL); g_return_val_if_fail (datetime != NULL, NULL);
@ -3364,18 +3371,13 @@ g_date_time_format (GDateTime *datetime,
outstr = g_string_sized_new (strlen (format) * 2); outstr = g_string_sized_new (strlen (format) * 2);
if (!g_date_time_format_locale (datetime, format, outstr, locale_is_utf8)) if (!g_date_time_format_utf8 (datetime, format, outstr, locale_is_utf8))
{ {
g_string_free (outstr, TRUE); g_string_free (outstr, TRUE);
return NULL; return NULL;
} }
if (locale_is_utf8) return g_string_free (outstr, FALSE);
return g_string_free (outstr, FALSE);
utf8 = g_locale_to_utf8 (outstr->str, outstr->len, NULL, NULL, NULL);
g_string_free (outstr, TRUE);
return utf8;
} }

View File

@ -1525,6 +1525,23 @@ test_non_utf8_printf (void)
g_free (oldlocale); g_free (oldlocale);
} }
/* Checks that it is possible to use format string that
* is unrepresentable in current locale charset. */
static void
test_format_unrepresentable (void)
{
gchar *oldlocale = g_strdup (setlocale (LC_ALL, NULL));
setlocale (LC_ALL, "POSIX");
TEST_PRINTF ("ąśćł", "ąśćł");
/* We are using Unicode ratio symbol here, which is outside ASCII. */
TEST_PRINTF_TIME (23, 15, 0, "%H%M", "2315");
setlocale (LC_ALL, oldlocale);
g_free (oldlocale);
}
static void static void
test_modifiers (void) test_modifiers (void)
{ {
@ -2489,6 +2506,7 @@ main (gint argc,
g_test_add_func ("/GDateTime/now", test_GDateTime_now); g_test_add_func ("/GDateTime/now", test_GDateTime_now);
g_test_add_func ("/GDateTime/printf", test_GDateTime_printf); g_test_add_func ("/GDateTime/printf", test_GDateTime_printf);
g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf); g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf);
g_test_add_func ("/GDateTime/format_unrepresentable", test_format_unrepresentable);
g_test_add_func ("/GDateTime/strftime", test_strftime); g_test_add_func ("/GDateTime/strftime", test_strftime);
g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling); g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling);
g_test_add_func ("/GDateTime/modifiers", test_modifiers); g_test_add_func ("/GDateTime/modifiers", test_modifiers);