From 05fb05b49b62df87b76f35de9f1de1beaddb0515 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Tue, 22 Oct 2024 12:42:41 +0100 Subject: [PATCH 1/3] gutf8: Factor out ifunc attribute checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks like these might get more complex in future, as compilers claim to support the attribute (`__has_attribute(ifunc)` is true) but then raise errors at compile time if the target architecture doesn’t support ifuncs. For example, see #3511. This doesn’t fix #3511 (I don’t have time to test on musl right now), but it should make it easier to update the platform preprocessor conditions in future. Signed-off-by: Philip Withnall Helps: #3511 --- glib/gutf8.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/glib/gutf8.c b/glib/gutf8.c index ba0a0e731..45a2801aa 100644 --- a/glib/gutf8.c +++ b/glib/gutf8.c @@ -42,6 +42,10 @@ #include "glibintl.h" #include "gvalgrind.h" +#if g_macro__has_attribute(ifunc) && !defined(G_OS_WIN32) +#define HAVE_WORKING_IFUNC_ATTRIBUTE 1 +#endif + #define UTF8_COMPUTE(Char, Mask, Len) \ if (Char < 128) \ { \ @@ -1841,7 +1845,7 @@ g_utf8_validate_native (const char *str, return *str == 0; } -#if g_macro__has_attribute(ifunc) && !defined(G_OS_WIN32) +#ifdef HAVE_WORKING_IFUNC_ATTRIBUTE /* The fast implementation of UTF-8 validation in `utf8_verify()` technically * uses undefined behaviour when the string length is not provided (i.e. when * it’s looking for a trailing nul terminator): when doing word-sized reads of @@ -1889,7 +1893,7 @@ static gboolean (*resolve_g_utf8_validate (void)) (const char *, gssize, const c else return g_utf8_validate_native; } -#endif +#endif /* HAVE_WORKING_IFUNC_ATTRIBUTE */ /** * g_utf8_validate: @@ -1924,7 +1928,7 @@ gboolean g_utf8_validate (const char *str, gssize max_len, const gchar **end) -#if g_macro__has_attribute(ifunc) && !defined(G_OS_WIN32) +#ifdef HAVE_WORKING_IFUNC_ATTRIBUTE __attribute__((ifunc ("resolve_g_utf8_validate"))); #else { From 1755024caf640f9d4f806c19f640fc8fe561c003 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Tue, 22 Oct 2024 12:45:20 +0100 Subject: [PATCH 2/3] gutf8: Factor out complex type signature into a typedef As suggested by Michael Catanzaro, this should make the return type of the resolve function a bit easier for people to parse. This introduces no functional changes. Signed-off-by: Philip Withnall --- glib/gutf8.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/glib/gutf8.c b/glib/gutf8.c index 45a2801aa..b245d5ffd 100644 --- a/glib/gutf8.c +++ b/glib/gutf8.c @@ -1886,7 +1886,12 @@ g_utf8_validate_valgrind (const char *str, return g_utf8_validate_len (str, max_len, end); } -static gboolean (*resolve_g_utf8_validate (void)) (const char *, gssize, const char **) +typedef gboolean (*GUtf8ValidateFunc) (const char *str, + gssize max_len, + const char **end); + +static GUtf8ValidateFunc +resolve_g_utf8_validate (void) { if (RUNNING_ON_VALGRIND) return g_utf8_validate_valgrind; From cf982177ddc7fbeb9a958812b23685bdfbd5c939 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Tue, 22 Oct 2024 12:46:16 +0100 Subject: [PATCH 3/3] gutf8: Add ifunc resolver for g_str_is_ascii() too Just like how commit ad572e77802c3c383619fe63a4832b5c75dbea82 added an ifunc resolver for `g_utf8_validate()`, we also need to add one for `g_str_is_ascii()`, as it also calls into the c-utf8 SIMD validation code which causes false-positive buffer read overflow warnings from valgrind and asan. I thought about just adding the `strlen()` call into `g_str_is_ascii()` unconditionally, as a simpler fix, but from a quick codesearch.debian.net, it appears `g_str_is_ascii()` is used quite widely, so this would have an unacceptable performance impact. This should fix the valgrind failures on the `search-utils` test seen here: https://gitlab.gnome.org/GNOME/glib/-/jobs/4423753. Signed-off-by: Philip Withnall --- glib/gutf8.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/glib/gutf8.c b/glib/gutf8.c index b245d5ffd..bca3358c2 100644 --- a/glib/gutf8.c +++ b/glib/gutf8.c @@ -1969,6 +1969,38 @@ g_utf8_validate_len (const char *str, return max_len == 0; } +static gboolean +g_str_is_ascii_native (const char *str) +{ + utf8_verify_ascii (&str, NULL); + + return *str == 0; +} + +#ifdef HAVE_WORKING_IFUNC_ATTRIBUTE +/* See above comment about `ifunc` use for g_utf8_validate(). */ +static gboolean +g_str_is_ascii_valgrind (const char *str) +{ + size_t len = strlen (str); + + utf8_verify_ascii (&str, &len); + + return *str == 0; +} + +typedef gboolean (*GStrIsAsciiFunc) (const char *str); + +static GStrIsAsciiFunc +resolve_g_str_is_ascii (void) +{ + if (RUNNING_ON_VALGRIND) + return g_str_is_ascii_valgrind; + else + return g_str_is_ascii_native; +} +#endif /* HAVE_WORKING_IFUNC_ATTRIBUTE */ + /** * g_str_is_ascii: * @str: a string @@ -1980,13 +2012,18 @@ g_utf8_validate_len (const char *str, * * Since: 2.40 */ +#if g_macro__has_attribute(no_sanitize_address) + __attribute__((no_sanitize_address)) +#endif gboolean g_str_is_ascii (const gchar *str) +#ifdef HAVE_WORKING_IFUNC_ATTRIBUTE + __attribute__((ifunc ("resolve_g_str_is_ascii"))); +#else { - utf8_verify_ascii (&str, NULL); - - return *str == 0; + return g_str_is_ascii_native (str); } +#endif /** * g_unichar_validate: