From cd3837174b907f7bb08fdd844da0e32718a3416a Mon Sep 17 00:00:00 2001 From: Peter Eisenmann Date: Sun, 21 May 2023 13:16:51 +0200 Subject: [PATCH] gutf8: Add a g_utf8_truncate_middle() function Adds a helper to truncate UTF8 strings in the middle, allowing to make them fit certain size constraints. This function is modeled after similar functionality that has existed since 2008 in nautilus and in eel before that. --- docs/reference/glib/glib-sections.txt.in | 1 + glib/gunicode.h | 4 ++ glib/gutf8.c | 66 ++++++++++++++++++++++++ glib/tests/utf8-misc.c | 53 +++++++++++++++++++ 4 files changed, 124 insertions(+) diff --git a/docs/reference/glib/glib-sections.txt.in b/docs/reference/glib/glib-sections.txt.in index d9e7ac3d6..89a2d4988 100644 --- a/docs/reference/glib/glib-sections.txt.in +++ b/docs/reference/glib/glib-sections.txt.in @@ -3131,6 +3131,7 @@ g_utf8_strchr g_utf8_strrchr g_utf8_strreverse g_utf8_substring +g_utf8_truncate_middle g_utf8_validate g_utf8_validate_len g_utf8_make_valid diff --git a/glib/gunicode.h b/glib/gunicode.h index 38e29b84f..85b3e0907 100644 --- a/glib/gunicode.h +++ b/glib/gunicode.h @@ -833,6 +833,10 @@ gchar *g_utf8_strncpy (gchar *dest, const gchar *src, gsize n); +GLIB_AVAILABLE_IN_2_78 +gchar *g_utf8_truncate_middle (const gchar *string, + gsize truncate_length); + /* Find the UTF-8 character corresponding to ch, in string p. These functions are equivalants to strchr and strrchr */ GLIB_AVAILABLE_IN_ALL diff --git a/glib/gutf8.c b/glib/gutf8.c index 3fa2def66..5a875ce26 100644 --- a/glib/gutf8.c +++ b/glib/gutf8.c @@ -460,6 +460,72 @@ g_utf8_strncpy (gchar *dest, return dest; } +/** + * g_utf8_truncate_middle: + * @string: (transfer none): a nul-terminated UTF-8 encoded string + * @truncate_length: the new size of @string, in characters, including the ellipsis character + * + * Cuts off the middle of the string, preserving half of @truncate_length + * characters at the beginning and half at the end. + * + * If @string is already short enough, this returns a copy of @string. + * If @truncate_length is `0`, an empty string is returned. + * + * Returns: (transfer full): a newly-allocated copy of @string ellipsized in the middle + * + * Since: 2.78 + */ +gchar * +g_utf8_truncate_middle (const gchar *string, + gsize truncate_length) +{ + const gchar *ellipsis = "…"; + const gsize ellipsis_bytes = strlen (ellipsis); + + gsize length; + gsize left_substring_length; + gchar *left_substring_end; + gchar *right_substring_begin; + gchar *right_substring_end; + gsize left_bytes; + gsize right_bytes; + gchar *result; + + g_return_val_if_fail (string != NULL, NULL); + + length = g_utf8_strlen (string, -1); + /* Current string already smaller than requested length */ + if (length <= truncate_length) + return g_strdup (string); + if (truncate_length == 0) + return g_strdup (""); + + /* Find substrings to keep, ignore ellipsis character for that */ + truncate_length -= 1; + + left_substring_length = truncate_length / 2; + + left_substring_end = g_utf8_offset_to_pointer (string, left_substring_length); + right_substring_begin = g_utf8_offset_to_pointer (left_substring_end, + length - truncate_length); + right_substring_end = g_utf8_offset_to_pointer (right_substring_begin, + truncate_length - left_substring_length); + + g_assert (*right_substring_end == '\0'); + + left_bytes = left_substring_end - string; + right_bytes = right_substring_end - right_substring_begin; + + result = g_malloc (left_bytes + ellipsis_bytes + right_bytes + 1); + + strncpy (result, string, left_bytes); + memcpy (result + left_bytes, ellipsis, ellipsis_bytes); + strncpy (result + left_bytes + ellipsis_bytes, right_substring_begin, right_bytes); + result[left_bytes + ellipsis_bytes + right_bytes] = '\0'; + + return result; +} + /* unicode_strchr */ /** diff --git a/glib/tests/utf8-misc.c b/glib/tests/utf8-misc.c index 7a1ed6465..9ad158a2b 100644 --- a/glib/tests/utf8-misc.c +++ b/glib/tests/utf8-misc.c @@ -162,6 +162,58 @@ test_utf8_make_valid (void) g_free (r); } +static void +truncate_middle_helper (const char *in_str, + gsize truncate_len, + const char *out_str) +{ + gchar *string = g_utf8_truncate_middle (in_str, truncate_len); + g_assert_cmpstr (string, ==, out_str); + g_free (string); +} + +static void +test_utf8_truncate_middle (void) +{ + truncate_middle_helper ("foo", 0, ""); + truncate_middle_helper ("foo", 1, "…"); + truncate_middle_helper ("foo", 2, "…o"); + truncate_middle_helper ("foo", 3, "foo"); + truncate_middle_helper ("foo", 4, "foo"); + truncate_middle_helper ("foo", 5, "foo"); + truncate_middle_helper ("foo", 6, "foo"); + truncate_middle_helper ("foo", 7, "foo"); + + truncate_middle_helper ("a_much_longer_foo", 0, ""); + truncate_middle_helper ("a_much_longer_foo", 1, "…"); + truncate_middle_helper ("a_much_longer_foo", 2, "…o"); + truncate_middle_helper ("a_much_longer_foo", 3, "a…o"); + truncate_middle_helper ("a_much_longer_foo", 4, "a…oo"); + truncate_middle_helper ("a_much_longer_foo", 5, "a_…oo"); + truncate_middle_helper ("a_much_longer_foo", 6, "a_…foo"); + truncate_middle_helper ("a_much_longer_foo", 7, "a_m…foo"); + truncate_middle_helper ("a_much_longer_foo", 8, "a_m…_foo"); + truncate_middle_helper ("a_much_longer_foo", 9, "a_mu…_foo"); + + truncate_middle_helper ("something_even", 8, "som…even"); + truncate_middle_helper ("something_odd", 8, "som…_odd"); + truncate_middle_helper ("something_even", 9, "some…even"); + truncate_middle_helper ("something_odd", 9, "some…_odd"); + truncate_middle_helper ("something_even", 10, "some…_even"); + truncate_middle_helper ("something_odd", 10, "some…g_odd"); + truncate_middle_helper ("something_even", 11, "somet…_even"); + truncate_middle_helper ("something_odd", 11, "somet…g_odd"); + truncate_middle_helper ("something_even", 12, "somet…g_even"); + truncate_middle_helper ("something_odd", 12, "somet…ng_odd"); + truncate_middle_helper ("something_even", 13, "someth…g_even"); + truncate_middle_helper ("something_odd", 13, "something_odd"); + truncate_middle_helper ("something_even", 14, "something_even"); + truncate_middle_helper ("something_odd", 13, "something_odd"); + + truncate_middle_helper ("ääääääääää", 5, "ää…ää"); + truncate_middle_helper ("あぃいぅうぇえぉ", 7, "あぃい…ぇえぉ"); +} + int main (int argc, char *argv[]) @@ -174,6 +226,7 @@ main (int argc, g_test_add_func ("/utf8/reverse", test_utf8_reverse); g_test_add_func ("/utf8/substring", test_utf8_substring); g_test_add_func ("/utf8/make-valid", test_utf8_make_valid); + g_test_add_func ("/utf8/truncate-middle", test_utf8_truncate_middle); return g_test_run(); }