mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-23 12:41:50 +01:00
Replace g_utf8_validate() with an optimized version, and clarify the docs
2004-11-24 Matthias Clasen <mclasen@redhat.com> * glib/gutf8.c: Replace g_utf8_validate() with an optimized version, and clarify the docs a bit. (#159131, Owen Taylor)
This commit is contained in:
parent
b8d9e050a4
commit
40fb4cff10
@ -1,5 +1,9 @@
|
|||||||
2004-11-24 Matthias Clasen <mclasen@redhat.com>
|
2004-11-24 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/gutf8.c: Replace g_utf8_validate() with an
|
||||||
|
optimized version, and clarify the docs a bit. (#159131,
|
||||||
|
Owen Taylor)
|
||||||
|
|
||||||
* tests/Makefile.am (test_programs): Add utf8-validate.
|
* tests/Makefile.am (test_programs): Add utf8-validate.
|
||||||
|
|
||||||
* tests/utf8-validate.c: Unit tests for g_utf8_validate().
|
* tests/utf8-validate.c: Unit tests for g_utf8_validate().
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
2004-11-24 Matthias Clasen <mclasen@redhat.com>
|
2004-11-24 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/gutf8.c: Replace g_utf8_validate() with an
|
||||||
|
optimized version, and clarify the docs a bit. (#159131,
|
||||||
|
Owen Taylor)
|
||||||
|
|
||||||
* tests/Makefile.am (test_programs): Add utf8-validate.
|
* tests/Makefile.am (test_programs): Add utf8-validate.
|
||||||
|
|
||||||
* tests/utf8-validate.c: Unit tests for g_utf8_validate().
|
* tests/utf8-validate.c: Unit tests for g_utf8_validate().
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
2004-11-24 Matthias Clasen <mclasen@redhat.com>
|
2004-11-24 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/gutf8.c: Replace g_utf8_validate() with an
|
||||||
|
optimized version, and clarify the docs a bit. (#159131,
|
||||||
|
Owen Taylor)
|
||||||
|
|
||||||
* tests/Makefile.am (test_programs): Add utf8-validate.
|
* tests/Makefile.am (test_programs): Add utf8-validate.
|
||||||
|
|
||||||
* tests/utf8-validate.c: Unit tests for g_utf8_validate().
|
* tests/utf8-validate.c: Unit tests for g_utf8_validate().
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
2004-11-24 Matthias Clasen <mclasen@redhat.com>
|
2004-11-24 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/gutf8.c: Replace g_utf8_validate() with an
|
||||||
|
optimized version, and clarify the docs a bit. (#159131,
|
||||||
|
Owen Taylor)
|
||||||
|
|
||||||
* tests/Makefile.am (test_programs): Add utf8-validate.
|
* tests/Makefile.am (test_programs): Add utf8-validate.
|
||||||
|
|
||||||
* tests/utf8-validate.c: Unit tests for g_utf8_validate().
|
* tests/utf8-validate.c: Unit tests for g_utf8_validate().
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
2004-11-24 Matthias Clasen <mclasen@redhat.com>
|
2004-11-24 Matthias Clasen <mclasen@redhat.com>
|
||||||
|
|
||||||
|
* glib/gutf8.c: Replace g_utf8_validate() with an
|
||||||
|
optimized version, and clarify the docs a bit. (#159131,
|
||||||
|
Owen Taylor)
|
||||||
|
|
||||||
* tests/Makefile.am (test_programs): Add utf8-validate.
|
* tests/Makefile.am (test_programs): Add utf8-validate.
|
||||||
|
|
||||||
* tests/utf8-validate.c: Unit tests for g_utf8_validate().
|
* tests/utf8-validate.c: Unit tests for g_utf8_validate().
|
||||||
|
219
glib/gutf8.c
219
glib/gutf8.c
@ -1511,19 +1511,171 @@ g_ucs4_to_utf16 (const gunichar *str,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CONTINUATION_CHAR \
|
||||||
|
G_STMT_START { \
|
||||||
|
if ((*(guchar *)p & 0xc0) != 0x80) /* 10xxxxxx */ \
|
||||||
|
goto error; \
|
||||||
|
val <<= 6; \
|
||||||
|
val |= (*(guchar *)p) & 0x3f; \
|
||||||
|
} G_STMT_END
|
||||||
|
|
||||||
|
static const gchar *
|
||||||
|
fast_validate (const char *str)
|
||||||
|
|
||||||
|
{
|
||||||
|
gunichar val = 0;
|
||||||
|
gunichar min = 0;
|
||||||
|
const gchar *p;
|
||||||
|
|
||||||
|
for (p = str; *p; p++)
|
||||||
|
{
|
||||||
|
if (*(guchar *)p < 128)
|
||||||
|
/* done */;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const gchar *last;
|
||||||
|
|
||||||
|
last = p;
|
||||||
|
if ((*(guchar *)p & 0xe0) == 0xc0) /* 110xxxxx */
|
||||||
|
{
|
||||||
|
if (G_UNLIKELY ((*(guchar *)p & 0x1e) == 0))
|
||||||
|
goto error;
|
||||||
|
p++;
|
||||||
|
if (G_UNLIKELY ((*(guchar *)p & 0xc0) != 0x80)) /* 10xxxxxx */
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((*(guchar *)p & 0xf0) == 0xe0) /* 1110xxxx */
|
||||||
|
{
|
||||||
|
min = (1 << 11);
|
||||||
|
val = *(guchar *)p & 0x0f;
|
||||||
|
goto TWO_REMAINING;
|
||||||
|
}
|
||||||
|
else if ((*(guchar *)p & 0xf8) == 0xf0) /* 11110xxx */
|
||||||
|
{
|
||||||
|
min = (1 << 16);
|
||||||
|
val = *(guchar *)p & 0x07;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
p++;
|
||||||
|
CONTINUATION_CHAR;
|
||||||
|
TWO_REMAINING:
|
||||||
|
p++;
|
||||||
|
CONTINUATION_CHAR;
|
||||||
|
p++;
|
||||||
|
CONTINUATION_CHAR;
|
||||||
|
|
||||||
|
if (G_UNLIKELY (val < min))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
if (G_UNLIKELY (!UNICODE_VALID(val)))
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
|
||||||
|
error:
|
||||||
|
return last;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const gchar *
|
||||||
|
fast_validate_len (const char *str,
|
||||||
|
gssize max_len)
|
||||||
|
|
||||||
|
{
|
||||||
|
gunichar val = 0;
|
||||||
|
gunichar min = 0;
|
||||||
|
const gchar *p;
|
||||||
|
|
||||||
|
for (p = str; (max_len < 0 || (p - str) < max_len) && *p; p++)
|
||||||
|
{
|
||||||
|
if (*(guchar *)p < 128)
|
||||||
|
/* done */;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const gchar *last;
|
||||||
|
|
||||||
|
last = p;
|
||||||
|
if ((*(guchar *)p & 0xe0) == 0xc0) /* 110xxxxx */
|
||||||
|
{
|
||||||
|
if (G_UNLIKELY (max_len >= 0 && max_len - (p - str) < 2))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
if (G_UNLIKELY ((*(guchar *)p & 0x1e) == 0))
|
||||||
|
goto error;
|
||||||
|
p++;
|
||||||
|
if (G_UNLIKELY ((*(guchar *)p & 0xc0) != 0x80)) /* 10xxxxxx */
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((*(guchar *)p & 0xf0) == 0xe0) /* 1110xxxx */
|
||||||
|
{
|
||||||
|
if (G_UNLIKELY (max_len >= 0 && max_len - (p - str) < 3))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
min = (1 << 11);
|
||||||
|
val = *(guchar *)p & 0x0f;
|
||||||
|
goto TWO_REMAINING;
|
||||||
|
}
|
||||||
|
else if ((*(guchar *)p & 0xf8) == 0xf0) /* 11110xxx */
|
||||||
|
{
|
||||||
|
if (G_UNLIKELY (max_len >= 0 && max_len - (p - str) < 4))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
min = (1 << 16);
|
||||||
|
val = *(guchar *)p & 0x07;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
p++;
|
||||||
|
CONTINUATION_CHAR;
|
||||||
|
TWO_REMAINING:
|
||||||
|
p++;
|
||||||
|
CONTINUATION_CHAR;
|
||||||
|
p++;
|
||||||
|
CONTINUATION_CHAR;
|
||||||
|
|
||||||
|
if (G_UNLIKELY (val < min))
|
||||||
|
goto error;
|
||||||
|
if (G_UNLIKELY (!UNICODE_VALID(val)))
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
|
||||||
|
error:
|
||||||
|
return last;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* g_utf8_validate:
|
* g_utf8_validate:
|
||||||
* @str: a pointer to character data
|
* @str: a pointer to character data
|
||||||
* @max_len: max bytes to validate, or -1 to go until nul
|
* @max_len: max bytes to validate, or -1 to go until NUL
|
||||||
* @end: return location for end of valid data
|
* @end: return location for end of valid data
|
||||||
*
|
*
|
||||||
* Validates UTF-8 encoded text. @str is the text to validate;
|
* Validates UTF-8 encoded text. @str is the text to validate;
|
||||||
* if @str is nul-terminated, then @max_len can be -1, otherwise
|
* if @str is nul-terminated, then @max_len can be -1, otherwise
|
||||||
* @max_len should be the number of bytes to validate.
|
* @max_len should be the number of bytes to validate.
|
||||||
* If @end is non-%NULL, then the end of the valid range
|
* If @end is non-%NULL, then the end of the valid range
|
||||||
* will be stored there (i.e. the address of the first invalid byte
|
* will be stored there (i.e. the start of the first invalid
|
||||||
* if some bytes were invalid, or the end of the text being validated
|
* character if some bytes were invalid, or the end of the text
|
||||||
* otherwise).
|
* being validated otherwise).
|
||||||
|
*
|
||||||
|
* Note that g_utf8_validate() returns %FALSE if @max_len is
|
||||||
|
* positive and NUL is met before @max_len bytes have been read.
|
||||||
*
|
*
|
||||||
* Returns %TRUE if all of @str was valid. Many GLib and GTK+
|
* Returns %TRUE if all of @str was valid. Many GLib and GTK+
|
||||||
* routines <emphasis>require</emphasis> valid UTF-8 as input;
|
* routines <emphasis>require</emphasis> valid UTF-8 as input;
|
||||||
@ -1533,66 +1685,29 @@ g_ucs4_to_utf16 (const gunichar *str,
|
|||||||
* Return value: %TRUE if the text was valid UTF-8
|
* Return value: %TRUE if the text was valid UTF-8
|
||||||
**/
|
**/
|
||||||
gboolean
|
gboolean
|
||||||
g_utf8_validate (const gchar *str,
|
g_utf8_validate (const char *str,
|
||||||
gssize max_len,
|
gssize max_len,
|
||||||
const gchar **end)
|
const gchar **end)
|
||||||
{
|
|
||||||
|
|
||||||
|
{
|
||||||
const gchar *p;
|
const gchar *p;
|
||||||
|
|
||||||
g_return_val_if_fail (str != NULL, FALSE);
|
if (max_len < 0)
|
||||||
|
p = fast_validate (str);
|
||||||
if (end)
|
else
|
||||||
*end = str;
|
p = fast_validate_len (str, max_len);
|
||||||
|
|
||||||
p = str;
|
|
||||||
|
|
||||||
while ((max_len < 0 || (p - str) < max_len) && *p)
|
|
||||||
{
|
|
||||||
int i, mask = 0, len;
|
|
||||||
gunichar result;
|
|
||||||
unsigned char c = (unsigned char) *p;
|
|
||||||
|
|
||||||
UTF8_COMPUTE (c, mask, len);
|
|
||||||
|
|
||||||
if (len == -1)
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* check that the expected number of bytes exists in str */
|
|
||||||
if (max_len >= 0 &&
|
|
||||||
((max_len - (p - str)) < len))
|
|
||||||
break;
|
|
||||||
|
|
||||||
UTF8_GET (result, p, i, mask, len);
|
|
||||||
|
|
||||||
if (UTF8_LENGTH (result) != len) /* Check for overlong UTF-8 */
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (result == (gunichar)-1)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (!UNICODE_VALID (result))
|
|
||||||
break;
|
|
||||||
|
|
||||||
p += len;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (end)
|
if (end)
|
||||||
*end = p;
|
*end = p;
|
||||||
|
|
||||||
/* See that we covered the entire length if a length was
|
if ((max_len >= 0 && p != str + max_len) ||
|
||||||
* passed in, or that we ended on a nul if not
|
(max_len < 0 && *p != '\0'))
|
||||||
*/
|
|
||||||
if (max_len >= 0 &&
|
|
||||||
p != (str + max_len))
|
|
||||||
return FALSE;
|
|
||||||
else if (max_len < 0 &&
|
|
||||||
*p != '\0')
|
|
||||||
return FALSE;
|
return FALSE;
|
||||||
else
|
else
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* g_unichar_validate:
|
* g_unichar_validate:
|
||||||
* @ch: a Unicode character
|
* @ch: a Unicode character
|
||||||
|
Loading…
Reference in New Issue
Block a user