gutf8: Add a g_utf8_truncate_middle() function

Adds a helper to truncate UTF8 strings in the middle, allowing to make
them fit certain size constraints.

This function is modeled after similar functionality that has existed
since 2008 in nautilus and in eel before that.
This commit is contained in:
Peter Eisenmann 2023-05-21 13:16:51 +02:00
parent 4641b02ca2
commit cd3837174b
4 changed files with 124 additions and 0 deletions

View File

@ -3131,6 +3131,7 @@ g_utf8_strchr
g_utf8_strrchr
g_utf8_strreverse
g_utf8_substring
g_utf8_truncate_middle
g_utf8_validate
g_utf8_validate_len
g_utf8_make_valid

View File

@ -833,6 +833,10 @@ gchar *g_utf8_strncpy (gchar *dest,
const gchar *src,
gsize n);
GLIB_AVAILABLE_IN_2_78
gchar *g_utf8_truncate_middle (const gchar *string,
gsize truncate_length);
/* Find the UTF-8 character corresponding to ch, in string p. These
functions are equivalants to strchr and strrchr */
GLIB_AVAILABLE_IN_ALL

View File

@ -460,6 +460,72 @@ g_utf8_strncpy (gchar *dest,
return dest;
}
/**
* g_utf8_truncate_middle:
* @string: (transfer none): a nul-terminated UTF-8 encoded string
* @truncate_length: the new size of @string, in characters, including the ellipsis character
*
* Cuts off the middle of the string, preserving half of @truncate_length
* characters at the beginning and half at the end.
*
* If @string is already short enough, this returns a copy of @string.
* If @truncate_length is `0`, an empty string is returned.
*
* Returns: (transfer full): a newly-allocated copy of @string ellipsized in the middle
*
* Since: 2.78
*/
gchar *
g_utf8_truncate_middle (const gchar *string,
gsize truncate_length)
{
const gchar *ellipsis = "";
const gsize ellipsis_bytes = strlen (ellipsis);
gsize length;
gsize left_substring_length;
gchar *left_substring_end;
gchar *right_substring_begin;
gchar *right_substring_end;
gsize left_bytes;
gsize right_bytes;
gchar *result;
g_return_val_if_fail (string != NULL, NULL);
length = g_utf8_strlen (string, -1);
/* Current string already smaller than requested length */
if (length <= truncate_length)
return g_strdup (string);
if (truncate_length == 0)
return g_strdup ("");
/* Find substrings to keep, ignore ellipsis character for that */
truncate_length -= 1;
left_substring_length = truncate_length / 2;
left_substring_end = g_utf8_offset_to_pointer (string, left_substring_length);
right_substring_begin = g_utf8_offset_to_pointer (left_substring_end,
length - truncate_length);
right_substring_end = g_utf8_offset_to_pointer (right_substring_begin,
truncate_length - left_substring_length);
g_assert (*right_substring_end == '\0');
left_bytes = left_substring_end - string;
right_bytes = right_substring_end - right_substring_begin;
result = g_malloc (left_bytes + ellipsis_bytes + right_bytes + 1);
strncpy (result, string, left_bytes);
memcpy (result + left_bytes, ellipsis, ellipsis_bytes);
strncpy (result + left_bytes + ellipsis_bytes, right_substring_begin, right_bytes);
result[left_bytes + ellipsis_bytes + right_bytes] = '\0';
return result;
}
/* unicode_strchr */
/**

View File

@ -162,6 +162,58 @@ test_utf8_make_valid (void)
g_free (r);
}
static void
truncate_middle_helper (const char *in_str,
gsize truncate_len,
const char *out_str)
{
gchar *string = g_utf8_truncate_middle (in_str, truncate_len);
g_assert_cmpstr (string, ==, out_str);
g_free (string);
}
static void
test_utf8_truncate_middle (void)
{
truncate_middle_helper ("foo", 0, "");
truncate_middle_helper ("foo", 1, "");
truncate_middle_helper ("foo", 2, "…o");
truncate_middle_helper ("foo", 3, "foo");
truncate_middle_helper ("foo", 4, "foo");
truncate_middle_helper ("foo", 5, "foo");
truncate_middle_helper ("foo", 6, "foo");
truncate_middle_helper ("foo", 7, "foo");
truncate_middle_helper ("a_much_longer_foo", 0, "");
truncate_middle_helper ("a_much_longer_foo", 1, "");
truncate_middle_helper ("a_much_longer_foo", 2, "…o");
truncate_middle_helper ("a_much_longer_foo", 3, "a…o");
truncate_middle_helper ("a_much_longer_foo", 4, "a…oo");
truncate_middle_helper ("a_much_longer_foo", 5, "a_…oo");
truncate_middle_helper ("a_much_longer_foo", 6, "a_…foo");
truncate_middle_helper ("a_much_longer_foo", 7, "a_m…foo");
truncate_middle_helper ("a_much_longer_foo", 8, "a_m…_foo");
truncate_middle_helper ("a_much_longer_foo", 9, "a_mu…_foo");
truncate_middle_helper ("something_even", 8, "som…even");
truncate_middle_helper ("something_odd", 8, "som…_odd");
truncate_middle_helper ("something_even", 9, "some…even");
truncate_middle_helper ("something_odd", 9, "some…_odd");
truncate_middle_helper ("something_even", 10, "some…_even");
truncate_middle_helper ("something_odd", 10, "some…g_odd");
truncate_middle_helper ("something_even", 11, "somet…_even");
truncate_middle_helper ("something_odd", 11, "somet…g_odd");
truncate_middle_helper ("something_even", 12, "somet…g_even");
truncate_middle_helper ("something_odd", 12, "somet…ng_odd");
truncate_middle_helper ("something_even", 13, "someth…g_even");
truncate_middle_helper ("something_odd", 13, "something_odd");
truncate_middle_helper ("something_even", 14, "something_even");
truncate_middle_helper ("something_odd", 13, "something_odd");
truncate_middle_helper ("ääääääääää", 5, "ää…ää");
truncate_middle_helper ("あぃいぅうぇえぉ", 7, "あぃい…ぇえぉ");
}
int
main (int argc,
char *argv[])
@ -174,6 +226,7 @@ main (int argc,
g_test_add_func ("/utf8/reverse", test_utf8_reverse);
g_test_add_func ("/utf8/substring", test_utf8_substring);
g_test_add_func ("/utf8/make-valid", test_utf8_make_valid);
g_test_add_func ("/utf8/truncate-middle", test_utf8_truncate_middle);
return g_test_run();
}