mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-06-05 12:20:13 +02:00
gutf8: Fix documentation for g_utf8_get_char_validated() length limits
If g_utf8_get_char_validated() encounters a nul byte in the middle of a string of given longer length, it returns -2, indicating a partial gunichar. That is not the obvious behaviour, but since g_utf8_get_char_validated() has been API for a long time, the behaviour cannot be changed. Document it, and add some unit tests (for this behaviour and the other behaviour of g_utf8_get_char_validated()). Signed-off-by: Philip Withnall <withnall@endlessm.com> https://bugzilla.gnome.org/show_bug.cgi?id=780095
This commit is contained in:
parent
428acd9b14
commit
3e89b19c44
@ -654,6 +654,10 @@ g_utf8_get_char_extended (const gchar *p,
|
|||||||
* This function checks for incomplete characters, for invalid characters
|
* This function checks for incomplete characters, for invalid characters
|
||||||
* such as characters that are out of the range of Unicode, and for
|
* such as characters that are out of the range of Unicode, and for
|
||||||
* overlong encodings of valid characters.
|
* overlong encodings of valid characters.
|
||||||
|
*
|
||||||
|
* Note that g_utf8_get_char_validated() returns (gunichar)-2 if
|
||||||
|
* @max_len is positive and any of the bytes in the first UTF-8 character
|
||||||
|
* sequence are nul.
|
||||||
*
|
*
|
||||||
* Returns: the resulting character. If @p points to a partial
|
* Returns: the resulting character. If @p points to a partial
|
||||||
* sequence at the end of a string that could begin a valid
|
* sequence at the end of a string that could begin a valid
|
||||||
|
@ -292,6 +292,57 @@ do_test (gconstpointer d)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Test the behaviour of g_utf8_get_char_validated() with various inputs and
|
||||||
|
* length restrictions. */
|
||||||
|
static void
|
||||||
|
test_utf8_get_char_validated (void)
|
||||||
|
{
|
||||||
|
const struct {
|
||||||
|
const gchar *buf;
|
||||||
|
gssize max_len;
|
||||||
|
gunichar expected_result;
|
||||||
|
} test_vectors[] = {
|
||||||
|
/* Bug #780095: */
|
||||||
|
{ "\xC0\x00_45678", 8, (gunichar) -2 },
|
||||||
|
{ "\xC0\x00_45678", -1, (gunichar) -2 },
|
||||||
|
/* It seems odd that the return value differs with the length input, but
|
||||||
|
* that’s how it’s documented: */
|
||||||
|
{ "", 0, (gunichar) -2 },
|
||||||
|
{ "", -1, (gunichar) 0 },
|
||||||
|
/* Normal inputs: */
|
||||||
|
{ "hello", 5, (gunichar) 'h' },
|
||||||
|
{ "hello", -1, (gunichar) 'h' },
|
||||||
|
{ "\xD8\x9F", 2, 0x061F },
|
||||||
|
{ "\xD8\x9F", -1, 0x061F },
|
||||||
|
{ "\xD8\x9Fmore", 6, 0x061F },
|
||||||
|
{ "\xD8\x9Fmore", -1, 0x061F },
|
||||||
|
{ "\xE2\x96\xB3", 3, 0x25B3 },
|
||||||
|
{ "\xE2\x96\xB3", -1, 0x25B3 },
|
||||||
|
{ "\xE2\x96\xB3more", 7, 0x25B3 },
|
||||||
|
{ "\xE2\x96\xB3more", -1, 0x25B3 },
|
||||||
|
{ "\xF0\x9F\x92\xA9", 4, 0x1F4A9 },
|
||||||
|
{ "\xF0\x9F\x92\xA9", -1, 0x1F4A9 },
|
||||||
|
{ "\xF0\x9F\x92\xA9more", 8, 0x1F4A9 },
|
||||||
|
{ "\xF0\x9F\x92\xA9more", -1, 0x1F4A9 },
|
||||||
|
/* Partial unichars: */
|
||||||
|
{ "\xD8", -1, (gunichar) -2 },
|
||||||
|
{ "\xD8\x9F", 1, (gunichar) -2 },
|
||||||
|
{ "\xCE", -1, (gunichar) -2 },
|
||||||
|
{ "\xCE", 1, (gunichar) -2 },
|
||||||
|
};
|
||||||
|
gsize i;
|
||||||
|
|
||||||
|
for (i = 0; i < G_N_ELEMENTS (test_vectors); i++)
|
||||||
|
{
|
||||||
|
gunichar actual_result;
|
||||||
|
|
||||||
|
g_test_message ("Vector %" G_GSIZE_FORMAT, i);
|
||||||
|
actual_result = g_utf8_get_char_validated (test_vectors[i].buf,
|
||||||
|
test_vectors[i].max_len);
|
||||||
|
g_assert_cmpint (actual_result, ==, test_vectors[i].expected_result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main (int argc, char *argv[])
|
main (int argc, char *argv[])
|
||||||
{
|
{
|
||||||
@ -307,5 +358,7 @@ main (int argc, char *argv[])
|
|||||||
g_free (path);
|
g_free (path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
g_test_add_func ("/utf8/get-char-validated", test_utf8_get_char_validated);
|
||||||
|
|
||||||
return g_test_run ();
|
return g_test_run ();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user