Optimized the overlong sequence check in g_utf8_get_char_extended()

Rather make it branch to get the due sequence length for the resulting
character code, we can as well get the minimum code value in the initial
branching.
This commit is contained in:
Mikhail Zabaluev 2010-05-27 14:00:12 -04:00 committed by Behdad Esfahbod
parent 30a8562946
commit c3bc0f4f8f

View File

@ -704,20 +704,23 @@ g_utf8_strrchr (const char *p,
/* Like g_utf8_get_char, but take a maximum length
* and return (gunichar)-2 on incomplete trailing character
* and return (gunichar)-2 on incomplete trailing character;
* also check for malformed or overlong sequences
* and return (gunichar)-1 in this case.
*/
static inline gunichar
g_utf8_get_char_extended (const gchar *p,
gssize max_len)
gssize max_len)
{
guint i, len;
gunichar min_code;
gunichar wc = (guchar) *p;
if (wc < 0x80)
{
return wc;
}
else if (wc < 0xc0)
else if (G_UNLIKELY (wc < 0xc0))
{
return (gunichar)-1;
}
@ -725,33 +728,38 @@ g_utf8_get_char_extended (const gchar *p,
{
len = 2;
wc &= 0x1f;
min_code = 1 << 7;
}
else if (wc < 0xf0)
{
len = 3;
wc &= 0x0f;
min_code = 1 << 11;
}
else if (wc < 0xf8)
{
len = 4;
wc &= 0x07;
min_code = 1 << 16;
}
else if (wc < 0xfc)
{
len = 5;
wc &= 0x03;
min_code = 1 << 21;
}
else if (wc < 0xfe)
{
len = 6;
wc &= 0x01;
min_code = 1 << 26;
}
else
{
return (gunichar)-1;
}
if (max_len >= 0 && len > max_len)
if (G_UNLIKELY (max_len >= 0 && len > max_len))
{
for (i = 1; i < max_len; i++)
{
@ -764,8 +772,8 @@ g_utf8_get_char_extended (const gchar *p,
for (i = 1; i < len; ++i)
{
gunichar ch = ((guchar *)p)[i];
if ((ch & 0xc0) != 0x80)
if (G_UNLIKELY ((ch & 0xc0) != 0x80))
{
if (ch)
return (gunichar)-1;
@ -777,9 +785,9 @@ g_utf8_get_char_extended (const gchar *p,
wc |= (ch & 0x3f);
}
if (UTF8_LENGTH(wc) != len)
if (G_UNLIKELY (wc < min_code))
return (gunichar)-1;
return wc;
}