From 491f835c17d200ede52c823ab1566c493479cdc1 Mon Sep 17 00:00:00 2001 From: Robert Ancell Date: Thu, 25 Aug 2016 11:53:54 +1200 Subject: [PATCH] GDateTime: Support parsing ISO 8601 strings This supports a subset of ISO 8601 since that is a commonly used standard for storing date and time information. We support only ISO 8601 strings that contain full date and time information as this would otherwise not map to GDateTime. This subset includes all of RFC 3339 which is commonly used on the Internet and the week and ordinal day formats as these are supported in the GDateTime APIs. (Minor modification by Philip Withnall to change API versions from 2.54 to 2.56.) https://bugzilla.gnome.org/show_bug.cgi?id=753459 --- docs/reference/glib/glib-sections.txt | 1 + glib/gdatetime.c | 337 ++++++++++++++++++++++++++ glib/gdatetime.h | 4 + glib/tests/gdatetime.c | 311 +++++++++++++++++++++++- 4 files changed, 652 insertions(+), 1 deletion(-) diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt index e09d4d3aa..e8a7fc868 100644 --- a/docs/reference/glib/glib-sections.txt +++ b/docs/reference/glib/glib-sections.txt @@ -1667,6 +1667,7 @@ g_date_time_new_from_unix_utc g_date_time_new_from_timeval_local g_date_time_new_from_timeval_utc +g_date_time_new_from_iso8601 g_date_time_new diff --git a/glib/gdatetime.c b/glib/gdatetime.c index c78bc4cac..e1c726047 100644 --- a/glib/gdatetime.c +++ b/glib/gdatetime.c @@ -22,6 +22,7 @@ * Thiago Santos * Emmanuele Bassi * Ryan Lortie + * Robert Ancell */ /* Algorithms within this file are based on the Calendar FAQ by @@ -902,6 +903,342 @@ g_date_time_new_from_timeval_utc (const GTimeVal *tv) return datetime; } +/* Parse integers in the form d (week days), dd (hours etc), ddd (ordinal days) or dddd (years) */ +static gboolean +get_iso8601_int (const gchar *text, gsize length, gint *value) +{ + gint i, v = 0; + + if (length < 1 || length > 4) + return FALSE; + + for (i = 0; i < length; i++) + { + const gchar c = text[i]; + if (c < '0' || c > '9') + return FALSE; + v = v * 10 + (c - '0'); + } + + *value = v; + return TRUE; +} + +/* Parse seconds in the form ss or ss.sss (variable length decimal) */ +static gboolean +get_iso8601_seconds (const gchar *text, gsize length, gdouble *value) +{ + gint i; + gdouble multiplier = 0.1, v = 0; + + if (length < 2) + return FALSE; + + for (i = 0; i < 2; i++) + { + const gchar c = text[i]; + if (c < '0' || c > '9') + return FALSE; + v = v * 10 + (c - '0'); + } + + if (length > 2 && !(text[i] == '.' || text[i] == ',')) + return FALSE; + i++; + if (i == length) + return FALSE; + + for (; i < length; i++) + { + const gchar c = text[i]; + if (c < '0' || c > '9') + return FALSE; + v += (c - '0') * multiplier; + multiplier *= 0.1; + } + + *value = v; + return TRUE; +} + +static GDateTime * +g_date_time_new_ordinal (GTimeZone *tz, gint year, gint ordinal_day, gint hour, gint minute, gdouble seconds) +{ + GDateTime *dt; + + if (ordinal_day < 1 || ordinal_day > (GREGORIAN_LEAP (year) ? 366 : 365)) + return NULL; + + dt = g_date_time_new (tz, year, 1, 1, hour, minute, seconds); + dt->days += ordinal_day - 1; + + return dt; +} + +static GDateTime * +g_date_time_new_week (GTimeZone *tz, gint year, gint week, gint week_day, gint hour, gint minute, gdouble seconds) +{ + gint64 p; + gint max_week, jan4_week_day, ordinal_day; + GDateTime *dt; + + p = (year * 365 + (year / 4) - (year / 100) + (year / 400)) % 7; + max_week = p == 4 ? 53 : 52; + + if (week < 1 || week > max_week || week_day < 1 || week_day > 7) + return NULL; + + dt = g_date_time_new (tz, year, 1, 4, 0, 0, 0); + g_date_time_get_week_number (dt, NULL, &jan4_week_day, NULL); + ordinal_day = (week * 7) + week_day - (jan4_week_day + 3); + if (ordinal_day < 0) + { + year--; + ordinal_day += GREGORIAN_LEAP (year) ? 366 : 365; + } + else if (ordinal_day > (GREGORIAN_LEAP (year) ? 366 : 365)) + { + ordinal_day -= (GREGORIAN_LEAP (year) ? 366 : 365); + year++; + } + + return g_date_time_new_ordinal (tz, year, ordinal_day, hour, minute, seconds); +} + +static GDateTime * +parse_iso8601_date (const gchar *text, gsize length, + gint hour, gint minute, gdouble seconds, GTimeZone *tz) +{ + /* YYYY-MM-DD */ + if (length == 10 && text[4] == '-' && text[7] == '-') + { + int year, month, day; + if (!get_iso8601_int (text, 4, &year) || + !get_iso8601_int (text + 5, 2, &month) || + !get_iso8601_int (text + 8, 2, &day)) + return NULL; + return g_date_time_new (tz, year, month, day, hour, minute, seconds); + } + /* YYYY-DDD */ + else if (length == 8 && text[4] == '-') + { + gint year, ordinal_day; + if (!get_iso8601_int (text, 4, &year) || + !get_iso8601_int (text + 5, 3, &ordinal_day)) + return NULL; + return g_date_time_new_ordinal (tz, year, ordinal_day, hour, minute, seconds); + } + /* YYYY-Www-D */ + else if (length == 10 && text[4] == '-' && text[5] == 'W' && text[8] == '-') + { + gint year, week, week_day; + if (!get_iso8601_int (text, 4, &year) || + !get_iso8601_int (text + 6, 2, &week) || + !get_iso8601_int (text + 9, 1, &week_day)) + return NULL; + return g_date_time_new_week (tz, year, week, week_day, hour, minute, seconds); + } + /* YYYYWwwD */ + else if (length == 8 && text[4] == 'W') + { + gint year, week, week_day; + if (!get_iso8601_int (text, 4, &year) || + !get_iso8601_int (text + 5, 2, &week) || + !get_iso8601_int (text + 7, 1, &week_day)) + return NULL; + return g_date_time_new_week (tz, year, week, week_day, hour, minute, seconds); + } + /* YYYYMMDD */ + else if (length == 8) + { + int year, month, day; + if (!get_iso8601_int (text, 4, &year) || + !get_iso8601_int (text + 4, 2, &month) || + !get_iso8601_int (text + 6, 2, &day)) + return NULL; + return g_date_time_new (tz, year, month, day, hour, minute, seconds); + } + /* YYYYDDD */ + else if (length == 7) + { + gint year, ordinal_day; + if (!get_iso8601_int (text, 4, &year) || + !get_iso8601_int (text + 4, 3, &ordinal_day)) + return NULL; + return g_date_time_new_ordinal (tz, year, ordinal_day, hour, minute, seconds); + } + else + return FALSE; +} + +static GTimeZone * +parse_iso8601_timezone (const gchar *text, gsize length, gssize *tz_offset) +{ + gint i, tz_length, offset_sign = 1, offset_hours, offset_minutes; + GTimeZone *tz; + + /* UTC uses Z suffix */ + if (length > 0 && text[length - 1] == 'Z') + { + *tz_offset = length - 1; + return g_time_zone_new_utc (); + } + + /* Look for '+' or '-' of offset */ + for (i = length - 1; i >= 0; i--) + if (text[i] == '+' || text[i] == '-') + { + offset_sign = text[i] == '-' ? -1 : 1; + break; + } + if (i < 0) + return NULL; + tz_length = length - i; + + /* +hh:mm or -hh:mm */ + if (tz_length == 6 && text[i+3] == ':') + { + if (!get_iso8601_int (text + i + 1, 2, &offset_hours) || + !get_iso8601_int (text + i + 4, 2, &offset_minutes)) + return NULL; + } + /* +hhmm or -hhmm */ + else if (tz_length == 5) + { + if (!get_iso8601_int (text + i + 1, 2, &offset_hours) || + !get_iso8601_int (text + i + 3, 2, &offset_minutes)) + return NULL; + } + /* +hh or -hh */ + else if (tz_length == 3) + { + if (!get_iso8601_int (text + i + 1, 2, &offset_hours)) + return NULL; + offset_minutes = 0; + } + else + return NULL; + + *tz_offset = i; + tz = g_time_zone_new (text + i); + + /* Double-check that the GTimeZone matches our interpretation of the timezone. + * Failure would indicate a bug either here of in the GTimeZone code. */ + g_assert (g_time_zone_get_offset (tz, 0) == offset_sign * (offset_hours * 3600 + offset_minutes * 60)); + + return tz; +} + +static gboolean +parse_iso8601_time (const gchar *text, gsize length, + gint *hour, gint *minute, gdouble *seconds, GTimeZone **tz) +{ + gssize tz_offset = -1; + + /* Check for timezone suffix */ + *tz = parse_iso8601_timezone (text, length, &tz_offset); + if (tz_offset >= 0) + length = tz_offset; + + /* hh:mm:ss(.sss) */ + if (length >= 8 && text[2] == ':' && text[5] == ':') + { + return get_iso8601_int (text, 2, hour) && + get_iso8601_int (text + 3, 2, minute) && + get_iso8601_seconds (text + 6, length - 6, seconds); + } + /* hhmmss(.sss) */ + else if (length >= 6) + { + return get_iso8601_int (text, 2, hour) && + get_iso8601_int (text + 2, 2, minute) && + get_iso8601_seconds (text + 4, length - 4, seconds); + } + else + return FALSE; +} + +/** + * g_date_time_new_from_iso8601: + * @text: an ISO 8601 formatted time string. + * @default_tz: (nullable): a #GTimeZone to use if the text doesn't contain a + * timezone, or %NULL. + * + * Creates a #GDateTime corresponding to the given + * [ISO 8601 formatted string](https://en.wikipedia.org/wiki/ISO_8601) + * @text. ISO 8601 strings of the form