mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2024-11-10 19:36:18 +01:00
Optimized the overlong sequence check in g_utf8_get_char_extended()
Rather make it branch to get the due sequence length for the resulting character code, we can as well get the minimum code value in the initial branching.
This commit is contained in:
parent
30a8562946
commit
c3bc0f4f8f
26
glib/gutf8.c
26
glib/gutf8.c
@ -704,20 +704,23 @@ g_utf8_strrchr (const char *p,
|
||||
|
||||
|
||||
/* Like g_utf8_get_char, but take a maximum length
|
||||
* and return (gunichar)-2 on incomplete trailing character
|
||||
* and return (gunichar)-2 on incomplete trailing character;
|
||||
* also check for malformed or overlong sequences
|
||||
* and return (gunichar)-1 in this case.
|
||||
*/
|
||||
static inline gunichar
|
||||
g_utf8_get_char_extended (const gchar *p,
|
||||
gssize max_len)
|
||||
gssize max_len)
|
||||
{
|
||||
guint i, len;
|
||||
gunichar min_code;
|
||||
gunichar wc = (guchar) *p;
|
||||
|
||||
if (wc < 0x80)
|
||||
{
|
||||
return wc;
|
||||
}
|
||||
else if (wc < 0xc0)
|
||||
else if (G_UNLIKELY (wc < 0xc0))
|
||||
{
|
||||
return (gunichar)-1;
|
||||
}
|
||||
@ -725,33 +728,38 @@ g_utf8_get_char_extended (const gchar *p,
|
||||
{
|
||||
len = 2;
|
||||
wc &= 0x1f;
|
||||
min_code = 1 << 7;
|
||||
}
|
||||
else if (wc < 0xf0)
|
||||
{
|
||||
len = 3;
|
||||
wc &= 0x0f;
|
||||
min_code = 1 << 11;
|
||||
}
|
||||
else if (wc < 0xf8)
|
||||
{
|
||||
len = 4;
|
||||
wc &= 0x07;
|
||||
min_code = 1 << 16;
|
||||
}
|
||||
else if (wc < 0xfc)
|
||||
{
|
||||
len = 5;
|
||||
wc &= 0x03;
|
||||
min_code = 1 << 21;
|
||||
}
|
||||
else if (wc < 0xfe)
|
||||
{
|
||||
len = 6;
|
||||
wc &= 0x01;
|
||||
min_code = 1 << 26;
|
||||
}
|
||||
else
|
||||
{
|
||||
return (gunichar)-1;
|
||||
}
|
||||
|
||||
if (max_len >= 0 && len > max_len)
|
||||
|
||||
if (G_UNLIKELY (max_len >= 0 && len > max_len))
|
||||
{
|
||||
for (i = 1; i < max_len; i++)
|
||||
{
|
||||
@ -764,8 +772,8 @@ g_utf8_get_char_extended (const gchar *p,
|
||||
for (i = 1; i < len; ++i)
|
||||
{
|
||||
gunichar ch = ((guchar *)p)[i];
|
||||
|
||||
if ((ch & 0xc0) != 0x80)
|
||||
|
||||
if (G_UNLIKELY ((ch & 0xc0) != 0x80))
|
||||
{
|
||||
if (ch)
|
||||
return (gunichar)-1;
|
||||
@ -777,9 +785,9 @@ g_utf8_get_char_extended (const gchar *p,
|
||||
wc |= (ch & 0x3f);
|
||||
}
|
||||
|
||||
if (UTF8_LENGTH(wc) != len)
|
||||
if (G_UNLIKELY (wc < min_code))
|
||||
return (gunichar)-1;
|
||||
|
||||
|
||||
return wc;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user