mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2024-11-06 17:36:14 +01:00
Merge branch 'wip/chergert/valgrind-utf8-check' into 'main'
glib/gutf8: use ifunc to check for valgrind See merge request GNOME/glib!4344
This commit is contained in:
commit
6cabc7bbf8
@ -190,6 +190,8 @@
|
|||||||
#define g_macro__has_attribute_fallthrough G_GNUC_CHECK_VERSION (6, 0)
|
#define g_macro__has_attribute_fallthrough G_GNUC_CHECK_VERSION (6, 0)
|
||||||
#define g_macro__has_attribute_may_alias G_GNUC_CHECK_VERSION (3, 3)
|
#define g_macro__has_attribute_may_alias G_GNUC_CHECK_VERSION (3, 3)
|
||||||
#define g_macro__has_attribute_warn_unused_result G_GNUC_CHECK_VERSION (3, 4)
|
#define g_macro__has_attribute_warn_unused_result G_GNUC_CHECK_VERSION (3, 4)
|
||||||
|
#define g_macro__has_attribute_no_sanitize_address 0
|
||||||
|
#define g_macro__has_attribute_ifunc 0
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
89
glib/gutf8.c
89
glib/gutf8.c
@ -40,6 +40,7 @@
|
|||||||
#include "gtypes.h"
|
#include "gtypes.h"
|
||||||
#include "gthread.h"
|
#include "gthread.h"
|
||||||
#include "glibintl.h"
|
#include "glibintl.h"
|
||||||
|
#include "gvalgrind.h"
|
||||||
|
|
||||||
#define UTF8_COMPUTE(Char, Mask, Len) \
|
#define UTF8_COMPUTE(Char, Mask, Len) \
|
||||||
if (Char < 128) \
|
if (Char < 128) \
|
||||||
@ -1824,6 +1825,72 @@ out:
|
|||||||
*lenp = len;
|
*lenp = len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
g_utf8_validate_native (const char *str,
|
||||||
|
gssize max_len,
|
||||||
|
const char **end)
|
||||||
|
{
|
||||||
|
if (max_len >= 0)
|
||||||
|
return g_utf8_validate_len (str, max_len, end);
|
||||||
|
|
||||||
|
utf8_verify (&str, NULL);
|
||||||
|
|
||||||
|
if (end != NULL)
|
||||||
|
*end = str;
|
||||||
|
|
||||||
|
return *str == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if g_macro__has_attribute(ifunc) && !defined(G_OS_WIN32)
|
||||||
|
/* The fast implementation of UTF-8 validation in `utf8_verify()` technically
|
||||||
|
* uses undefined behaviour when the string length is not provided (i.e. when
|
||||||
|
* it’s looking for a trailing nul terminator): when doing word-sized reads of
|
||||||
|
* the string, it can read up to the word size (minus one byte) beyond the end
|
||||||
|
* of the string in order to find the nul terminator.
|
||||||
|
*
|
||||||
|
* While this is guaranteed to not cause a page fault (at worst, the nul
|
||||||
|
* terminator could be in the final word of the page, and the code won’t read
|
||||||
|
* any further than that), it is still technically undefined behaviour in C,
|
||||||
|
* because we’re reading off the end of an array.
|
||||||
|
*
|
||||||
|
* We don’t *think* this can cause any bugs due to compiler optimisations,
|
||||||
|
* because glibc does exactly the same thing in its string handling code, and
|
||||||
|
* that code has been extensively tested. For example:
|
||||||
|
* https://github.com/bminor/glibc/blob/2c1903cbbac0022153a67776f474c221250ad6ed/string/strchrnul.c
|
||||||
|
*
|
||||||
|
* However, both valgrind and asan warn about the read beyond the end of the
|
||||||
|
* array (a ‘heap buffer overflow read’). They’re right to do this (they can’t
|
||||||
|
* know the read is bounded to the word size minus one, and guaranteed to not
|
||||||
|
* cross a page boundary), but it’s annoying for any application which calls
|
||||||
|
* `g_utf8_validate()`.
|
||||||
|
*
|
||||||
|
* Use an [indirect function (`ifunc`)](https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-ifunc-function-attribute)
|
||||||
|
* to use a fallback implementation of `g_utf8_validate()` when running under
|
||||||
|
* valgrind. This is resolved at load time using `resolve_g_utf8_validate()`.
|
||||||
|
*
|
||||||
|
* Similarly, mark the real implementation so that it’s not instrumented by asan
|
||||||
|
* using `no_sanitize_address`.
|
||||||
|
*/
|
||||||
|
static gboolean
|
||||||
|
g_utf8_validate_valgrind (const char *str,
|
||||||
|
gssize max_len,
|
||||||
|
const char **end)
|
||||||
|
{
|
||||||
|
if (max_len < 0)
|
||||||
|
max_len = strlen (str);
|
||||||
|
|
||||||
|
return g_utf8_validate_len (str, max_len, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
static gboolean (*resolve_g_utf8_validate (void)) (const char *, gssize, const char **)
|
||||||
|
{
|
||||||
|
if (RUNNING_ON_VALGRIND)
|
||||||
|
return g_utf8_validate_valgrind;
|
||||||
|
else
|
||||||
|
return g_utf8_validate_native;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* g_utf8_validate:
|
* g_utf8_validate:
|
||||||
* @str: (array length=max_len) (element-type guint8): a pointer to character data
|
* @str: (array length=max_len) (element-type guint8): a pointer to character data
|
||||||
@ -1850,22 +1917,20 @@ out:
|
|||||||
*
|
*
|
||||||
* Returns: `TRUE` if the text was valid UTF-8
|
* Returns: `TRUE` if the text was valid UTF-8
|
||||||
*/
|
*/
|
||||||
|
#if g_macro__has_attribute(no_sanitize_address)
|
||||||
|
__attribute__((no_sanitize_address))
|
||||||
|
#endif
|
||||||
gboolean
|
gboolean
|
||||||
g_utf8_validate (const char *str,
|
g_utf8_validate (const char *str,
|
||||||
gssize max_len,
|
gssize max_len,
|
||||||
const gchar **end)
|
const gchar **end)
|
||||||
|
#if g_macro__has_attribute(ifunc) && !defined(G_OS_WIN32)
|
||||||
|
__attribute__((ifunc ("resolve_g_utf8_validate")));
|
||||||
|
#else
|
||||||
{
|
{
|
||||||
if (max_len >= 0)
|
return g_utf8_validate_native (str, max_len, end);
|
||||||
return g_utf8_validate_len (str, max_len, end);
|
|
||||||
|
|
||||||
utf8_verify (&str, NULL);
|
|
||||||
|
|
||||||
if (end != NULL)
|
|
||||||
*end = str;
|
|
||||||
|
|
||||||
return *str == 0;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* g_utf8_validate_len:
|
* g_utf8_validate_len:
|
||||||
|
Loading…
Reference in New Issue
Block a user