Made g_utf8_to_ucs4_fast() even faster

https://bugzilla.gnome.org/show_bug.cgi?id=619435
This commit is contained in:
Mikhail Zabaluev 2010-03-17 02:34:51 +02:00 committed by Matthias Clasen
parent 1b101a3873
commit 69efeee287

View File

@ -851,7 +851,6 @@ g_utf8_to_ucs4_fast (const gchar *str,
glong len, glong len,
glong *items_written) glong *items_written)
{ {
gint j, charlen;
gunichar *result; gunichar *result;
gint n_chars, i; gint n_chars, i;
const gchar *p; const gchar *p;
@ -882,49 +881,37 @@ g_utf8_to_ucs4_fast (const gchar *str,
p = str; p = str;
for (i=0; i < n_chars; i++) for (i=0; i < n_chars; i++)
{ {
gunichar wc = ((unsigned char *)p)[0]; gunichar wc = (guchar)*p++;
if (wc < 0x80) if (wc < 0x80)
{ {
result[i] = wc; result[i] = wc;
p++;
} }
else else
{ {
if (wc < 0xe0) gunichar mask = 0x40;
if (G_UNLIKELY ((wc & mask) == 0))
{ {
charlen = 2; /* It's an out-of-sequence 10xxxxxxx byte.
wc &= 0x1f; * Rather than making an ugly hash of this and the next byte
} * and overrunning the buffer, it's more useful to treat it
else if (wc < 0xf0) * with a replacement character */
{ result[i] = 0xfffd;
charlen = 3; continue;
wc &= 0x0f;
}
else if (wc < 0xf8)
{
charlen = 4;
wc &= 0x07;
}
else if (wc < 0xfc)
{
charlen = 5;
wc &= 0x03;
}
else
{
charlen = 6;
wc &= 0x01;
} }
for (j = 1; j < charlen; j++) do
{ {
wc <<= 6; wc <<= 6;
wc |= ((unsigned char *)p)[j] & 0x3f; wc |= (guchar)(*p++) & 0x3f;
mask <<= 5;
} }
while((wc & mask) != 0);
wc &= mask - 1;
result[i] = wc; result[i] = wc;
p += charlen;
} }
} }
result[i] = 0; result[i] = 0;