From df4aea76204090f770a8fd90c2b68b51c2cfc2a3 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Wed, 18 Oct 2023 15:50:57 +0100 Subject: [PATCH 1/4] gdatetime: Add support for %E modifier to g_date_time_format() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `%E` modifier causes dates to be formatted using an alternative era representation for years. This doesn’t do anything for most dates, but in locales such as Thai and Japanese it causes years to be printed using era names. In Thai, this means the Thai solar calendar (https://en.wikipedia.org/wiki/Thai_solar_calendar). In Japanese, this means Japanese era names (https://en.wikipedia.org/wiki/Japanese_era_name). The `%E` modifier syntax follows what’s supported in glibc — see nl_langinfo(3). Supporting this is quite involved, as it means loading the `ERA` description from libc and parsing it. Unit tests are included. Signed-off-by: Philip Withnall Fixes: #3119 --- .gitlab-ci.yml | 6 +- .gitlab-ci/debian-stable.Dockerfile | 1 + .gitlab-ci/fedora.Dockerfile | 1 + .gitlab-ci/mingw.Dockerfile | 2 +- glib/gdatetime-private.c | 254 ++++++++++++++++++++++++++++ glib/gdatetime-private.h | 85 ++++++++++ glib/gdatetime.c | 247 +++++++++++++++++++++++++-- glib/meson.build | 1 + glib/tests/gdatetime.c | 195 +++++++++++++++++++++ glib/tests/meson.build | 1 + meson.build | 5 + 11 files changed, 784 insertions(+), 14 deletions(-) create mode 100644 glib/gdatetime-private.c create mode 100644 glib/gdatetime-private.h diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 906387ea9..b0415eefc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,11 +11,11 @@ cache: - _ccache/ variables: - FEDORA_IMAGE: "registry.gitlab.gnome.org/gnome/glib/fedora:v22" + FEDORA_IMAGE: "registry.gitlab.gnome.org/gnome/glib/fedora:v23" COVERITY_IMAGE: "registry.gitlab.gnome.org/gnome/glib/coverity:v7" - DEBIAN_IMAGE: "registry.gitlab.gnome.org/gnome/glib/debian-stable:v16" + DEBIAN_IMAGE: "registry.gitlab.gnome.org/gnome/glib/debian-stable:v17" ALPINE_IMAGE: "registry.gitlab.gnome.org/gnome/glib/alpine:v0" - MINGW_IMAGE: "registry.gitlab.gnome.org/gnome/glib/mingw:v12" + MINGW_IMAGE: "registry.gitlab.gnome.org/gnome/glib/mingw:v13" MESON_TEST_TIMEOUT_MULTIPLIER: 4 G_MESSAGES_DEBUG: all MESON_COMMON_OPTIONS: "--buildtype debug --wrap-mode=nodownload --fatal-meson-warnings" diff --git a/.gitlab-ci/debian-stable.Dockerfile b/.gitlab-ci/debian-stable.Dockerfile index 592a6b356..027829bd3 100644 --- a/.gitlab-ci/debian-stable.Dockerfile +++ b/.gitlab-ci/debian-stable.Dockerfile @@ -64,6 +64,7 @@ RUN locale-gen de_DE.UTF-8 \ && locale-gen lt_LT.UTF-8 \ && locale-gen pl_PL.UTF-8 \ && locale-gen ru_RU.UTF-8 \ + && locale-gen th_TH.UTF-8 \ && locale-gen tr_TR.UTF-8 ENV LANG=C.UTF-8 LANGUAGE=C.UTF-8 LC_ALL=C.UTF-8 diff --git a/.gitlab-ci/fedora.Dockerfile b/.gitlab-ci/fedora.Dockerfile index 5e28542ab..057db215f 100644 --- a/.gitlab-ci/fedora.Dockerfile +++ b/.gitlab-ci/fedora.Dockerfile @@ -32,6 +32,7 @@ RUN dnf -y update \ glibc-langpack-lt \ glibc-langpack-pl \ glibc-langpack-ru \ + glibc-langpack-th \ glibc-langpack-tr \ "gnome-desktop-testing >= 2018.1" \ gtk-doc \ diff --git a/.gitlab-ci/mingw.Dockerfile b/.gitlab-ci/mingw.Dockerfile index d801255ce..f95171836 100644 --- a/.gitlab-ci/mingw.Dockerfile +++ b/.gitlab-ci/mingw.Dockerfile @@ -1,4 +1,4 @@ -FROM registry.gitlab.gnome.org/gnome/glib/fedora:v22 +FROM registry.gitlab.gnome.org/gnome/glib/fedora:v23 USER root diff --git a/glib/gdatetime-private.c b/glib/gdatetime-private.c new file mode 100644 index 000000000..19f575a7d --- /dev/null +++ b/glib/gdatetime-private.c @@ -0,0 +1,254 @@ +/* + * Copyright 2023 Philip Withnall + * + * SPDX-License-Identifier: LGPL-2.1-or-later + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "glib.h" +#include "gdatetime-private.h" + +/** + * _g_era_date_compare: + * @date1: first date + * @date2: second date + * + * Compare two #GEraDates for ordering, taking into account negative and + * positive infinity. + * + * Returns: strcmp()-style integer, `<0` indicates `date1 < date2`, `0` + * indicates `date1 == date2`, `>0` indicates `date1 > date2` + * Since: 2.80 + */ +int +_g_era_date_compare (const GEraDate *date1, + const GEraDate *date2) +{ + if (date1->type == G_ERA_DATE_SET && + date2->type == G_ERA_DATE_SET) + { + if (date1->year != date2->year) + return date1->year - date2->year; + if (date1->month != date2->month) + return date1->month - date2->month; + return date1->day - date2->day; + } + + if (date1->type == date2->type) + return 0; + + if (date1->type == G_ERA_DATE_MINUS_INFINITY || date2->type == G_ERA_DATE_PLUS_INFINITY) + return -1; + if (date1->type == G_ERA_DATE_PLUS_INFINITY || date2->type == G_ERA_DATE_MINUS_INFINITY) + return 1; + + g_assert_not_reached (); +} + +static gboolean +parse_era_date (const char *str, + const char *endptr, + GEraDate *out_date) +{ + const char *str_endptr = NULL; + int year_multiplier; + guint64 year, month, day; + + year_multiplier = (str[0] == '-') ? -1 : 1; + if (str[0] == '-' || str[0] == '+') + str++; + + year = g_ascii_strtoull (str, (gchar **) &str_endptr, 10); + g_assert (str_endptr <= endptr); + if (str_endptr == endptr || *str_endptr != '/' || year > G_MAXINT) + return FALSE; + str = str_endptr + 1; + + month = g_ascii_strtoull (str, (gchar **) &str_endptr, 10); + g_assert (str_endptr <= endptr); + if (str_endptr == endptr || *str_endptr != '/' || month < 1 || month > 12) + return FALSE; + str = str_endptr + 1; + + day = g_ascii_strtoull (str, (gchar **) &str_endptr, 10); + g_assert (str_endptr <= endptr); + if (str_endptr != endptr || day < 1 || day > 31) + return FALSE; + + /* Success */ + out_date->type = G_ERA_DATE_SET; + out_date->year = year_multiplier * year; + out_date->month = month; + out_date->day = day; + + return TRUE; +} + +/** + * _g_era_description_segment_ref: + * @segment: a #GEraDescriptionSegment + * + * Increase the ref count of @segment. + * + * Returns: (transfer full): @segment + * Since: 2.80 + */ +GEraDescriptionSegment * +_g_era_description_segment_ref (GEraDescriptionSegment *segment) +{ + g_atomic_ref_count_inc (&segment->ref_count); + return segment; +} + +/** + * _g_era_description_segment_unref: + * @segment: (transfer full): a #GEraDescriptionSegment to unref + * + * Decreases the ref count of @segment. + * + * Since: 2.80 + */ +void +_g_era_description_segment_unref (GEraDescriptionSegment *segment) +{ + if (g_atomic_ref_count_dec (&segment->ref_count)) + { + g_free (segment->era_format); + g_free (segment->era_name); + g_free (segment); + } +} + +/** + * _g_era_description_parse: + * @desc: an `ERA` description string from `nl_langinfo()` + * + * Parse an ERA description string. See [`nl_langinfo(3)`](man:nl_langinfo(3)). + * + * Example description string for th_TR.UTF-8: + * ``` + * +:1:-543/01/01:+*:พ.ศ.:%EC %Ey + * ``` + * + * @desc must be in UTF-8, so all conversion from the locale encoding must + * happen before this function is called. The resulting `era_name` and + * `era_format` in the returned segments will be in UTF-8. + * + * Returns: (transfer full) (nullable) (element-type GEraDescriptionSegment): + * array of one or more parsed era segments, or %NULL if parsing failed + * Since: 2.80 + */ +GPtrArray * +_g_era_description_parse (const char *desc) +{ + GPtrArray *segments = g_ptr_array_new_with_free_func ((GDestroyNotify) _g_era_description_segment_unref); + + for (const char *p = desc; *p != '\0';) + { + const char *next_colon, *endptr = NULL; + GEraDescriptionSegment *segment = NULL; + char direction; + guint64 offset; + GEraDate start_date, end_date; + char *era_name = NULL, *era_format = NULL; + + /* direction */ + direction = *p++; + if (direction != '+' && direction != '-') + goto error; + + if (*p++ != ':') + goto error; + + /* offset */ + next_colon = strchr (p, ':'); + if (next_colon == NULL) + goto error; + + offset = g_ascii_strtoull (p, (gchar **) &endptr, 10); + if (endptr != next_colon) + goto error; + p = next_colon + 1; + + /* start_date */ + next_colon = strchr (p, ':'); + if (next_colon == NULL) + goto error; + + if (!parse_era_date (p, next_colon, &start_date)) + goto error; + p = next_colon + 1; + + /* end_date */ + next_colon = strchr (p, ':'); + if (next_colon == NULL) + goto error; + + if (strncmp (p, "-*", 2) == 0) + end_date.type = G_ERA_DATE_MINUS_INFINITY; + else if (strncmp (p, "+*", 2) == 0) + end_date.type = G_ERA_DATE_PLUS_INFINITY; + else if (!parse_era_date (p, next_colon, &end_date)) + goto error; + p = next_colon + 1; + + /* era_name */ + next_colon = strchr (p, ':'); + if (next_colon == NULL) + goto error; + + if (next_colon - p == 0) + goto error; + era_name = g_strndup (p, next_colon - p); + p = next_colon + 1; + + /* era_format; either the final field in the segment (followed by a + * semicolon) or the description (followed by nul) */ + next_colon = strchr (p, ';'); + if (next_colon == NULL) + next_colon = p + strlen (p); + + if (next_colon - p == 0) + { + g_free (era_name); + goto error; + } + era_format = g_strndup (p, next_colon - p); + if (*next_colon == ';') + p = next_colon + 1; + else + p = next_colon; + + /* Successfully parsed that segment. */ + segment = g_new0 (GEraDescriptionSegment, 1); + g_atomic_ref_count_init (&segment->ref_count); + segment->offset = offset; + segment->start_date = start_date; + segment->end_date = end_date; + segment->direction_multiplier = + ((_g_era_date_compare (&segment->start_date, &segment->end_date) <= 0) ? 1 : -1) * + ((direction == '-') ? -1 : 1); + segment->era_name = g_steal_pointer (&era_name); + segment->era_format = g_steal_pointer (&era_format); + + g_ptr_array_add (segments, g_steal_pointer (&segment)); + } + + return g_steal_pointer (&segments); + +error: + g_ptr_array_unref (segments); + return NULL; +} diff --git a/glib/gdatetime-private.h b/glib/gdatetime-private.h new file mode 100644 index 000000000..3e804dd47 --- /dev/null +++ b/glib/gdatetime-private.h @@ -0,0 +1,85 @@ +/* + * Copyright 2023 Philip Withnall + * + * SPDX-License-Identifier: LGPL-2.1-or-later + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#pragma once + +#include "glib.h" + +G_BEGIN_DECLS + +/** + * GEraDate: + * @type: the type of date + * @year: year of the date, in the Gregorian calendar + * @month: month of the date, in the Gregorian calendar + * @day: day of the date, in the Gregorian calendar + * + * A date from a #GEraDescriptionSegment. + * + * If @type is %G_ERA_DATE_SET, @year, @month and @day are valid. Otherwise, + * they are undefined. + * + * Since: 2.80 + */ +typedef struct { + enum { + G_ERA_DATE_SET, + G_ERA_DATE_PLUS_INFINITY, + G_ERA_DATE_MINUS_INFINITY, + } type; + int year; + int month; + int day; +} GEraDate; + +int _g_era_date_compare (const GEraDate *date1, + const GEraDate *date2); + +/** + * GEraDescriptionSegment: + * @ref_count: reference count + * @direction_multiplier: `-1` or `1` depending on the order of @start_date and + * @end_date + * @offset: offset of the first year in the era + * @start_date: start date (in the Gregorian calendar) of the era + * @end_date: end date (in the Gregorian calendar) of the era + * @era_name: (not nullable): name of the era + * @era_format: (not nullable): format string to use for `%EY` + * + * A segment of an `ERA` description string, describing a single era. See + * [`nl_langinfo(3)`](man:nl_langinfo(3)). + * + * Since: 2.80 + */ +typedef struct { + gatomicrefcount ref_count; + int direction_multiplier; + guint64 offset; + GEraDate start_date; /* inclusive */ + GEraDate end_date; /* inclusive */ + char *era_name; /* UTF-8 encoded */ + char *era_format; /* UTF-8 encoded */ +} GEraDescriptionSegment; + +GPtrArray *_g_era_description_parse (const char *desc); + +GEraDescriptionSegment *_g_era_description_segment_ref (GEraDescriptionSegment *segment); +void _g_era_description_segment_unref (GEraDescriptionSegment *segment); + +G_END_DECLS diff --git a/glib/gdatetime.c b/glib/gdatetime.c index a41e1bfee..ae50c27c7 100644 --- a/glib/gdatetime.c +++ b/glib/gdatetime.c @@ -69,6 +69,7 @@ #include "gconvert.h" #include "gconvertprivate.h" #include "gdatetime.h" +#include "gdatetime-private.h" #include "gfileutils.h" #include "ghash.h" #include "glibintl.h" @@ -162,8 +163,11 @@ static const guint16 days_in_year[2][13] = #define GET_AMPM_IS_LOCALE TRUE #define PREFERRED_DATE_TIME_FMT nl_langinfo (D_T_FMT) +#define PREFERRED_ERA_DATE_TIME_FMT nl_langinfo (ERA_D_T_FMT) #define PREFERRED_DATE_FMT nl_langinfo (D_FMT) +#define PREFERRED_ERA_DATE_FMT nl_langinfo (ERA_D_FMT) #define PREFERRED_TIME_FMT nl_langinfo (T_FMT) +#define PREFERRED_ERA_TIME_FMT nl_langinfo (ERA_T_FMT) #define PREFERRED_12HR_TIME_FMT nl_langinfo (T_FMT_AMPM) static const gint weekday_item[2][7] = @@ -187,6 +191,10 @@ static const gint month_item[2][12] = #define MONTH_FULL(d) nl_langinfo (month_item[1][g_date_time_get_month (d) - 1]) #define MONTH_FULL_IS_LOCALE TRUE +#define ERA_DESCRIPTION nl_langinfo (ERA) +#define ERA_DESCRIPTION_IS_LOCALE TRUE +#define ERA_DESCRIPTION_N_SEGMENTS (int) (gintptr) nl_langinfo (_NL_TIME_ERA_NUM_ENTRIES) + #else #define GET_AMPM(d) (get_fallback_ampm (g_date_time_get_hour (d))) @@ -194,12 +202,15 @@ static const gint month_item[2][12] = /* Translators: this is the preferred format for expressing the date and the time */ #define PREFERRED_DATE_TIME_FMT C_("GDateTime", "%a %b %e %H:%M:%S %Y") +#define PREFERRED_ERA_DATE_TIME_FMT PREFERRED_DATE_TIME_FMT /* Translators: this is the preferred format for expressing the date */ #define PREFERRED_DATE_FMT C_("GDateTime", "%m/%d/%y") +#define PREFERRED_ERA_DATE_FMT PREFERRED_DATE_FMT /* Translators: this is the preferred format for expressing the time */ #define PREFERRED_TIME_FMT C_("GDateTime", "%H:%M:%S") +#define PREFERRED_ERA_TIME_FMT PREFERRED_TIME_FMT /* Translators: this is the preferred format for expressing 12 hour time */ #define PREFERRED_12HR_TIME_FMT C_("GDateTime", "%I:%M:%S %p") @@ -219,6 +230,10 @@ static const gint month_item[2][12] = #define MONTH_FULL(d) (get_month_name_standalone (g_date_time_get_month (d))) #define MONTH_FULL_IS_LOCALE FALSE +#define ERA_DESCRIPTION NULL +#define ERA_DESCRIPTION_IS_LOCALE FALSE +#define ERA_DESCRIPTION_N_SEGMENTS 0 + static const gchar * get_month_name_standalone (gint month) { @@ -2865,6 +2880,131 @@ initialize_alt_digits (void) } #endif /* HAVE_LANGINFO_OUTDIGIT */ +/* Look up the era which contains @datetime, in the ERA description from libc + * which corresponds to the currently set LC_TIME locale. The ERA is parsed and + * cached the first time this function is called (or when LC_TIME changes). + * See nl_langinfo(3). + * + * The return value is (transfer full). */ +static GEraDescriptionSegment * +date_time_lookup_era (GDateTime *datetime, + gboolean locale_is_utf8) +{ + static GMutex era_mutex; + static GPtrArray *static_era_description = NULL; /* (mutex era_mutex) (element-type GEraDescriptionSegment) */ + static const char *static_era_description_locale = NULL; /* (mutex era_mutex) */ + const char *current_lc_time = setlocale (LC_TIME, NULL); + GPtrArray *local_era_description; /* (element-type GEraDescriptionSegment) */ + GEraDate datetime_date; + + g_mutex_lock (&era_mutex); + + if (static_era_description_locale != current_lc_time) + { + const char *era_description_str; + size_t era_description_str_len; + char *tmp = NULL; + + era_description_str = ERA_DESCRIPTION; + if (era_description_str != NULL) + { + /* FIXME: glibc 2.37 seems to return the era segments nul-separated rather + * than semicolon-separated (which is what nl_langinfo(3) specifies). + * Fix that up before sending it to the parsing code. + * See https://sourceware.org/bugzilla/show_bug.cgi?id=31030*/ + { + /* Work out the length of the whole description string, regardless + * of whether it uses nuls or semicolons as separators. */ + int n_entries = ERA_DESCRIPTION_N_SEGMENTS; + const char *s = era_description_str; + + for (int i = 1; i < n_entries; i++) + { + const char *next_semicolon = strchr (s, ';'); + const char *next_nul = strchr (s, '\0'); + + if (next_semicolon != NULL && next_semicolon < next_nul) + s = next_semicolon + 1; + else + s = next_nul + 1; + } + + era_description_str_len = strlen (s) + (s - era_description_str); + + /* Replace all the nuls with semicolons. */ + era_description_str = tmp = g_memdup2 (era_description_str, era_description_str_len + 1); + s = era_description_str; + + for (int i = 1; i < n_entries; i++) + { + char *next_nul = strchr (s, '\0'); + + if ((size_t) (next_nul - era_description_str) >= era_description_str_len) + break; + + *next_nul = ';'; + s = next_nul + 1; + } + } + + /* Convert from the LC_TIME encoding to UTF-8 if needed. */ + if (!locale_is_utf8 && ERA_DESCRIPTION_IS_LOCALE) + { + char *tmp2 = NULL; + era_description_str = tmp2 = g_locale_to_utf8 (era_description_str, -1, NULL, NULL, NULL); + g_free (tmp); + tmp = g_steal_pointer (&tmp2); + } + + g_clear_pointer (&static_era_description, g_ptr_array_unref); + + if (era_description_str != NULL) + static_era_description = _g_era_description_parse (era_description_str); + } + + if (static_era_description == NULL) + g_warning ("Could not parse ERA description: %s", era_description_str); + + g_free (tmp); + + static_era_description_locale = current_lc_time; + } + + if (static_era_description == NULL) + { + g_mutex_unlock (&era_mutex); + return NULL; + } + + local_era_description = g_ptr_array_ref (static_era_description); + g_mutex_unlock (&era_mutex); + + /* Search through the eras and see if one matches. */ + datetime_date.type = G_ERA_DATE_SET; + datetime_date.year = g_date_time_get_year (datetime); + datetime_date.month = g_date_time_get_month (datetime); + datetime_date.day = g_date_time_get_day_of_month (datetime); + + for (unsigned int i = 0; i < local_era_description->len; i++) + { + GEraDescriptionSegment *segment = g_ptr_array_index (local_era_description, i); + + if ((_g_era_date_compare (&segment->start_date, &datetime_date) <= 0 && + _g_era_date_compare (&datetime_date, &segment->end_date) <= 0) || + (_g_era_date_compare (&segment->end_date, &datetime_date) <= 0 && + _g_era_date_compare (&datetime_date, &segment->start_date) <= 0)) + { + /* @datetime is within this era segment. */ + g_ptr_array_unref (local_era_description); + return _g_era_description_segment_ref (segment); + } + } + + g_ptr_array_unref (local_era_description); + + return NULL; +} + static void format_number (GString *str, gboolean use_alt_digits, @@ -3043,6 +3183,7 @@ g_date_time_format_utf8 (GDateTime *datetime, guint colons; gunichar c; gboolean alt_digits = FALSE; + gboolean alt_era = FALSE; gboolean pad_set = FALSE; gboolean mod_case = FALSE; gboolean name_is_utf8; @@ -3069,6 +3210,7 @@ g_date_time_format_utf8 (GDateTime *datetime, colons = 0; alt_digits = FALSE; + alt_era = FALSE; pad_set = FALSE; mod_case = FALSE; @@ -3129,14 +3271,31 @@ g_date_time_format_utf8 (GDateTime *datetime, break; case 'c': { - if (g_strcmp0 (PREFERRED_DATE_TIME_FMT, "") == 0) + const char *subformat = alt_era ? PREFERRED_ERA_DATE_TIME_FMT : PREFERRED_DATE_TIME_FMT; + + /* Fallback */ + if (alt_era && g_strcmp0 (subformat, "") == 0) + subformat = PREFERRED_DATE_TIME_FMT; + + if (g_strcmp0 (subformat, "") == 0) return FALSE; - if (!g_date_time_format_locale (datetime, PREFERRED_DATE_TIME_FMT, + if (!g_date_time_format_locale (datetime, subformat, outstr, locale_is_utf8)) return FALSE; } break; case 'C': + if (alt_era) + { + GEraDescriptionSegment *era = date_time_lookup_era (datetime, locale_is_utf8); + if (era != NULL) + { + g_string_append (outstr, era->era_name); + _g_era_description_segment_unref (era); + break; + } + } + format_number (outstr, alt_digits, pad_set ? pad : "0", 2, g_date_time_get_year (datetime) / 100); break; @@ -3214,6 +3373,9 @@ g_date_time_format_utf8 (GDateTime *datetime, case 'O': alt_digits = TRUE; goto next_mod; + case 'E': + alt_era = TRUE; + goto next_mod; case 'p': if (!format_ampm (datetime, outstr, locale_is_utf8, mod_case && g_strcmp0 (mod, "#") == 0 ? FALSE @@ -3270,29 +3432,78 @@ g_date_time_format_utf8 (GDateTime *datetime, break; case 'x': { - if (g_strcmp0 (PREFERRED_DATE_FMT, "") == 0) + const char *subformat = alt_era ? PREFERRED_ERA_DATE_FMT : PREFERRED_DATE_FMT; + + /* Fallback */ + if (alt_era && g_strcmp0 (subformat, "") == 0) + subformat = PREFERRED_DATE_FMT; + + if (g_strcmp0 (subformat, "") == 0) return FALSE; - if (!g_date_time_format_locale (datetime, PREFERRED_DATE_FMT, + if (!g_date_time_format_locale (datetime, subformat, outstr, locale_is_utf8)) return FALSE; } break; case 'X': { - if (g_strcmp0 (PREFERRED_TIME_FMT, "") == 0) + const char *subformat = alt_era ? PREFERRED_ERA_TIME_FMT : PREFERRED_TIME_FMT; + + /* Fallback */ + if (alt_era && g_strcmp0 (subformat, "") == 0) + subformat = PREFERRED_TIME_FMT; + + if (g_strcmp0 (subformat, "") == 0) return FALSE; - if (!g_date_time_format_locale (datetime, PREFERRED_TIME_FMT, + if (!g_date_time_format_locale (datetime, subformat, outstr, locale_is_utf8)) return FALSE; } break; case 'y': - format_number (outstr, alt_digits, pad_set ? pad : "0", 2, - g_date_time_get_year (datetime) % 100); + if (alt_era) + { + GEraDescriptionSegment *era = date_time_lookup_era (datetime, locale_is_utf8); + if (era != NULL) + { + int delta = g_date_time_get_year (datetime) - era->start_date.year; + + /* Both these years are in the Gregorian calendar (CE/BCE), + * which has no year zero. So take one from the delta if they + * cross across where year zero would be. */ + if ((g_date_time_get_year (datetime) < 0) != (era->start_date.year < 0)) + delta -= 1; + + format_number (outstr, alt_digits, pad_set ? pad : "0", 2, + era->offset + delta * era->direction_multiplier); + _g_era_description_segment_unref (era); + break; + } + } + + format_number (outstr, alt_digits, pad_set ? pad : "0", 2, + g_date_time_get_year (datetime) % 100); break; case 'Y': - format_number (outstr, alt_digits, 0, 0, - g_date_time_get_year (datetime)); + if (alt_era) + { + GEraDescriptionSegment *era = date_time_lookup_era (datetime, locale_is_utf8); + if (era != NULL) + { + if (!g_date_time_format_utf8 (datetime, era->era_format, + outstr, locale_is_utf8)) + { + _g_era_description_segment_unref (era); + return FALSE; + } + + _g_era_description_segment_unref (era); + break; + } + } + + format_number (outstr, alt_digits, 0, 0, + g_date_time_get_year (datetime)); break; case 'z': { @@ -3461,6 +3672,22 @@ g_date_time_format_utf8 (GDateTime *datetime, * `strftime()` extension expected to be added to the future POSIX specification, * `%Ob` and `%Oh` are GNU `strftime()` extensions. Since: 2.56 * + * Since GLib 2.80, when `E` is used with `%c`, `%C`, `%x`, `%X`, `%y` or `%Y`, + * the date is formatted using an alternate era representation specific to the + * locale. This is typically used for the Thai solar calendar or Japanese era + * names, for example. + * + * - `%Ec`: the preferred date and time representation for the current locale, + * using the alternate era representation + * - `%EC`: the name of the era + * - `%Ex`: the preferred date representation for the current locale without + * the time, using the alternate era representation + * - `%EX`: the preferred time representation for the current locale without + * the date, using the alternate era representation + * - `%Ey`: the year since the beginning of the era denoted by the `%EC` + * specifier + * - `%EY`: the full alternative year representation + * * Returns: (transfer full) (nullable): a newly allocated string formatted to * the requested format or %NULL in the case that there was an error (such * as a format specifier not being supported in the current locale). The diff --git a/glib/meson.build b/glib/meson.build index 5940e6bc3..95e863e46 100644 --- a/glib/meson.build +++ b/glib/meson.build @@ -277,6 +277,7 @@ glib_sources += files( 'gdataset.c', 'gdate.c', 'gdatetime.c', + 'gdatetime-private.c', 'gdir.c', 'genviron.c', 'gerror.c', diff --git a/glib/tests/gdatetime.c b/glib/tests/gdatetime.c index f7aa9a2d2..52d8a3d49 100644 --- a/glib/tests/gdatetime.c +++ b/glib/tests/gdatetime.c @@ -1,6 +1,7 @@ /* gdatetime-tests.c * * Copyright (C) 2009-2010 Christian Hergert + * Copyright 2023 Philip Withnall * * SPDX-License-Identifier: LGPL-2.1-or-later * @@ -28,6 +29,8 @@ #include #include +#include "gdatetime-private.h" + #ifdef G_OS_WIN32 #define WIN32_LEAN_AND_MEAN #include @@ -1718,6 +1721,12 @@ test_non_utf8_printf (void) TEST_PRINTF ("%%", "%"); TEST_PRINTF ("%", ""); TEST_PRINTF ("%9", NULL); + TEST_PRINTF ("%Ec", "平成21年10月24日 00時00分00秒"); + TEST_PRINTF ("%EC", "平成"); + TEST_PRINTF ("%Ex", "平成21年10月24日"); + TEST_PRINTF ("%EX", "00時00分00秒"); + TEST_PRINTF ("%Ey", "21"); + TEST_PRINTF ("%EY", "平成21年"); setlocale (LC_ALL, oldlocale); g_free (oldlocale); @@ -1873,6 +1882,30 @@ test_modifiers (void) g_test_skip ("langinfo not available, skipping O modifier tests"); #endif + setlocale (LC_ALL, "en_GB.utf-8"); + if (strstr (setlocale (LC_ALL, NULL), "en_GB") != NULL) + { + TEST_PRINTF_DATE (2009, 1, 1, "%c", "thu 01 jan 2009 00:00:00 utc"); + TEST_PRINTF_DATE (2009, 1, 1, "%Ec", "thu 01 jan 2009 00:00:00 utc"); + + TEST_PRINTF_DATE (2009, 1, 1, "%C", "20"); + TEST_PRINTF_DATE (2009, 1, 1, "%EC", "20"); + + TEST_PRINTF_DATE (2009, 1, 2, "%x", "02/01/09"); + TEST_PRINTF_DATE (2009, 1, 2, "%Ex", "02/01/09"); + + TEST_PRINTF_TIME (1, 2, 3, "%X", "01:02:03"); + TEST_PRINTF_TIME (1, 2, 3, "%EX", "01:02:03"); + + TEST_PRINTF_DATE (2009, 1, 1, "%y", "09"); + TEST_PRINTF_DATE (2009, 1, 1, "%Ey", "09"); + + TEST_PRINTF_DATE (2009, 1, 1, "%Y", "2009"); + TEST_PRINTF_DATE (2009, 1, 1, "%EY", "2009"); + } + else + g_test_skip ("locale en_GB not available, skipping E modifier tests"); + setlocale (LC_ALL, oldlocale); g_free (oldlocale); } @@ -2212,6 +2245,164 @@ test_all_dates (void) g_time_zone_unref (timezone); } +static void +test_date_time_eras_japan (void) +{ + gchar *oldlocale; + + oldlocale = g_strdup (setlocale (LC_ALL, NULL)); + setlocale (LC_ALL, "ja_JP.utf-8"); + if (strstr (setlocale (LC_ALL, NULL), "ja_JP") == NULL) + { + g_test_skip ("locale ja_JP.utf-8 not available, skipping Japanese era tests"); + g_free (oldlocale); + return; + } + + /* See https://en.wikipedia.org/wiki/Japanese_era_name + * First test the Reiwa era (令和) */ + TEST_PRINTF_DATE (2023, 06, 01, "%Ec", "令和05年06月01日 00時00分00秒"); + TEST_PRINTF_DATE (2023, 06, 01, "%EC", "令和"); + TEST_PRINTF_DATE (2023, 06, 01, "%Ex", "令和05年06月01日"); + TEST_PRINTF_DATE (2023, 06, 01, "%EX", "00時00分00秒"); + TEST_PRINTF_DATE (2023, 06, 01, "%Ey", "05"); + TEST_PRINTF_DATE (2023, 06, 01, "%EY", "令和05年"); + + /* Heisei era (平成) */ + TEST_PRINTF_DATE (2019, 04, 30, "%Ec", "平成31年04月30日 00時00分00秒"); + TEST_PRINTF_DATE (2019, 04, 30, "%EC", "平成"); + TEST_PRINTF_DATE (2019, 04, 30, "%Ex", "平成31年04月30日"); + TEST_PRINTF_DATE (2019, 04, 30, "%EX", "00時00分00秒"); + TEST_PRINTF_DATE (2019, 04, 30, "%Ey", "31"); + TEST_PRINTF_DATE (2019, 04, 30, "%EY", "平成31年"); + + /* Shōwa era (昭和) */ + TEST_PRINTF_DATE (1926, 12, 25, "%Ec", "昭和元年12月25日 00時00分00秒"); + TEST_PRINTF_DATE (1926, 12, 25, "%EC", "昭和"); + TEST_PRINTF_DATE (1926, 12, 25, "%Ex", "昭和元年12月25日"); + TEST_PRINTF_DATE (1926, 12, 25, "%EX", "00時00分00秒"); + TEST_PRINTF_DATE (1926, 12, 25, "%Ey", "01"); + TEST_PRINTF_DATE (1926, 12, 25, "%EY", "昭和元年"); + + setlocale (LC_ALL, oldlocale); + g_free (oldlocale); +} + +static void +test_date_time_eras_thailand (void) +{ + gchar *oldlocale; + + oldlocale = g_strdup (setlocale (LC_ALL, NULL)); + setlocale (LC_ALL, "th_TH.utf-8"); + if (strstr (setlocale (LC_ALL, NULL), "th_TH") == NULL) + { + g_test_skip ("locale th_TH.utf-8 not available, skipping Thai era tests"); + g_free (oldlocale); + return; + } + + /* See https://en.wikipedia.org/wiki/Thai_solar_calendar */ + TEST_PRINTF_DATE (2023, 06, 01, "%Ec", "วันพฤหัสบดีที่  1 มิถุนายน พ.ศ. 2566, 00.00.00 น."); + TEST_PRINTF_DATE (2023, 06, 01, "%EC", "พ.ศ."); + TEST_PRINTF_DATE (2023, 06, 01, "%Ex", " 1 มิ.ย. 2566"); + TEST_PRINTF_DATE (2023, 06, 01, "%EX", "00.00.00 น."); + TEST_PRINTF_DATE (2023, 06, 01, "%Ey", "2566"); + TEST_PRINTF_DATE (2023, 06, 01, "%EY", "พ.ศ. 2566"); + + TEST_PRINTF_DATE (01, 06, 01, "%Ex", " 1 มิ.ย. 544"); + + setlocale (LC_ALL, oldlocale); + g_free (oldlocale); +} + +static void +test_date_time_eras_parsing (void) +{ + struct + { + const char *desc; + gboolean expected_success; + size_t expected_n_segments; + } + vectors[] = + { + /* Some successful parsing: */ + { "", TRUE, 0 }, + /* From https://github.com/bminor/glibc/blob/9fd3409842b3e2d31cff5dbd6f96066c430f0aa2/localedata/locales/th_TH#L233: */ + { "+:1:-543/01/01:+*:พ.ศ.:%EC %Ey", TRUE, 1 }, + /* From https://github.com/bminor/glibc/blob/9fd3409842b3e2d31cff5dbd6f96066c430f0aa2/localedata/locales/ja_JP#L14967C5-L14977C60: */ + { "+:2:2020/01/01:+*:令和:%EC%Ey年;" + "+:1:2019/05/01:2019/12/31:令和:%EC元年;" + "+:2:1990/01/01:2019/04/30:平成:%EC%Ey年;" + "+:1:1989/01/08:1989/12/31:平成:%EC元年;" + "+:2:1927/01/01:1989/01/07:昭和:%EC%Ey年;" + "+:1:1926/12/25:1926/12/31:昭和:%EC元年;" + "+:2:1913/01/01:1926/12/24:大正:%EC%Ey年;" + "+:1:1912/07/30:1912/12/31:大正:%EC元年;" + "+:6:1873/01/01:1912/07/29:明治:%EC%Ey年;" + "+:1:0001/01/01:1872/12/31:西暦:%EC%Ey年;" + "+:1:-0001/12/31:-*:紀元前:%EC%Ey年", TRUE, 11 }, + { "-:2:2020/01/01:-*:令和:%EC%Ey年", TRUE, 1 }, + { "+:2:2020/01/01:2020/01/01:令和:%EC%Ey年", TRUE, 1 }, + { "+:2:+2020/01/01:+*:令和:%EC%Ey年", TRUE, 1 }, + /* Some errors: */ + { ".:2:2020/01/01:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+.2:2020/01/01:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+", FALSE, 0 }, + { "+:", FALSE, 0 }, + { "+::", FALSE, 0 }, + { "+:200", FALSE, 0 }, + { "+:2nonsense", FALSE, 0 }, + { "+:2nonsense:", FALSE, 0 }, + { "+:2:", FALSE, 0 }, + { "+:2::", FALSE, 0 }, + { "+:2:2020-01/01:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020nonsense/01/01:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:18446744073709551615/01/01:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020/01-01:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020/01nonsense/01:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020/01:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020/00/01:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020/13/01:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020/01/00:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020/01/32:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020/01/01nonsense:+*:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020/01/01", FALSE, 0 }, + { "+:2:2020/01/01:", FALSE, 0 }, + { "+:2:2020/01/01::", FALSE, 0 }, + { "+:2:2020/01/01:2021-01-01:令和:%EC%Ey年", FALSE, 0 }, + { "+:2:2020/01/01:+*", FALSE, 0 }, + { "+:2:2020/01/01:+*:", FALSE, 0 }, + { "+:2:2020/01/01:+*::", FALSE, 0 }, + { "+:2:2020/01/01:+*:令和", FALSE, 0 }, + { "+:2:2020/01/01:+*:令和:", FALSE, 0 }, + { "+:2:2020/01/01:+*:令和:;", FALSE, 0 }, + }; + + for (size_t i = 0; i < G_N_ELEMENTS (vectors); i++) + { + GPtrArray *segments = NULL; + + g_test_message ("Vector %" G_GSIZE_FORMAT ": %s", i, vectors[i].desc); + + segments = _g_era_description_parse (vectors[i].desc); + + if (vectors[i].expected_success) + { + g_assert_nonnull (segments); + g_assert_cmpuint (segments->len, ==, vectors[i].expected_n_segments); + } + else + { + g_assert_null (segments); + } + + g_clear_pointer (&segments, g_ptr_array_unref); + } +} + static void test_z (void) { @@ -3249,6 +3440,10 @@ main (gint argc, g_test_add_func ("/GDateTime/dst", test_GDateTime_dst); g_test_add_func ("/GDateTime/test_z", test_z); g_test_add_func ("/GDateTime/test-all-dates", test_all_dates); + g_test_add_func ("/GDateTime/eras/japan", test_date_time_eras_japan); + g_test_add_func ("/GDateTime/eras/thailand", test_date_time_eras_thailand); + g_test_add_func ("/GDateTime/eras/parsing", test_date_time_eras_parsing); + g_test_add_func ("/GTimeZone/find-interval", test_find_interval); g_test_add_func ("/GTimeZone/adjust-time", test_adjust_time); g_test_add_func ("/GTimeZone/no-header", test_no_header); diff --git a/glib/tests/meson.build b/glib/tests/meson.build index 084cabb86..829beb8f5 100644 --- a/glib/tests/meson.build +++ b/glib/tests/meson.build @@ -40,6 +40,7 @@ glib_tests = { 'error' : {}, 'fileutils' : {}, 'gdatetime' : { + 'source' : ['gdatetime.c', '../gdatetime-private.c'], 'suite' : ['slow'], # musl: GDateTime/format_mixed/non_utf8_time_non_utf8_messages should be # skipped but it's not. The fix should be on musl side: diff --git a/meson.build b/meson.build index d48f13903..92aefb554 100644 --- a/meson.build +++ b/meson.build @@ -1311,6 +1311,11 @@ if cc.links('''#include str = nl_langinfo (ABMON_12); str = nl_langinfo (DAY_1); str = nl_langinfo (ABDAY_7); + str = nl_langinfo (ERA); + str = nl_langinfo (ERA_D_T_FMT); + str = nl_langinfo (ERA_D_FMT); + str = nl_langinfo (ERA_T_FMT); + str = nl_langinfo (_NL_TIME_ERA_NUM_ENTRIES); return 0; }''', name : 'nl_langinfo (PM_STR)') glib_conf.set('HAVE_LANGINFO_TIME', 1) From 454064f82d8d9172519e7b60572e34fa6fb8929a Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Fri, 3 Nov 2023 13:43:10 +0000 Subject: [PATCH 2/4] gdate: Fix some typos in a documentation comment Signed-off-by: Philip Withnall --- glib/gdate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/glib/gdate.c b/glib/gdate.c index 09872db0a..c509bd303 100644 --- a/glib/gdate.c +++ b/glib/gdate.c @@ -66,9 +66,9 @@ * @dmy: this is set if @day, @month and @year are valid * @day: the day of the day-month-year representation of the date, * as a number between 1 and 31 - * @month: the day of the day-month-year representation of the date, + * @month: the month of the day-month-year representation of the date, * as a number between 1 and 12 - * @year: the day of the day-month-year representation of the date + * @year: the year of the day-month-year representation of the date * * `GDate` is a struct for calendrical calculations. * From 09fe2aa78dc6143af875575b0dd1135182347749 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Sat, 4 Nov 2023 00:45:53 +0000 Subject: [PATCH 3/4] gdatetime: Fix a minor leak on changing locale Signed-off-by: Philip Withnall --- glib/gdatetime.c | 1 + 1 file changed, 1 insertion(+) diff --git a/glib/gdatetime.c b/glib/gdatetime.c index ae50c27c7..6543f32f6 100644 --- a/glib/gdatetime.c +++ b/glib/gdatetime.c @@ -3042,6 +3042,7 @@ format_number (GString *str, if (alt_digits == NULL) alt_digits = ascii_digits; + g_free (alt_digits_locale); alt_digits_locale = g_strdup (current_ctype_locale); } From 517a3ac861fa1216ed45ed59e40c7de6e40c1a0b Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Sat, 4 Nov 2023 00:46:23 +0000 Subject: [PATCH 4/4] tests: Set locale to C.UTF-8 in datetime tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was previously set (by default) to `C`, so this commit doesn’t change the locale behaviour of the tests, but does ensure that messages printed by the tests are correctly formatted in UTF-8 rather than transliterated to ASCII. That makes interpreting test output easier. Signed-off-by: Philip Withnall --- glib/tests/gdatetime.c | 1 + 1 file changed, 1 insertion(+) diff --git a/glib/tests/gdatetime.c b/glib/tests/gdatetime.c index 52d8a3d49..802faf64f 100644 --- a/glib/tests/gdatetime.c +++ b/glib/tests/gdatetime.c @@ -3373,6 +3373,7 @@ main (gint argc, * categories. Unset it to avoid interference with tests. */ g_unsetenv ("CHARSET"); + setlocale (LC_ALL, "C.UTF-8"); g_test_init (&argc, &argv, NULL); /* GDateTime Tests */