From aec5785e7b35cea46c54170157da6ecd96392129 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Sun, 25 Aug 2024 21:56:14 +0100 Subject: [PATCH] gthread: Move thread _impl functions to static inlines for speed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The changes made in commit bc59e28bf6b0f70ff345aef80356d0076f44a0e7 (issue #3399) fixed introspection of the GThread API. However, they introduced a trampoline in every threading function. So with those changes applied, the disassembly of `g_mutex_lock()` (for example) was: ``` 0x7ffff7f038b0 jmp 0x7ffff7f2f440 0x7ffff7f038b5 data16 cs nopw 0x0(%rax,%rax,1) ``` i.e. It jumps straight to the `_impl` function, even with an optimised build. Since `g_mutex_lock()` (and various other GThread functions) are frequently run hot paths, this additional `jmp` to a function which has ended up in a different code page is a slowdown which we’d rather avoid. So, this commit reworks things to define all the `_impl` functions as `G_ALWAYS_INLINE static inline` (which typically expands to `__attribute__((__always_inline__)) static inline`), and to move them into the same compilation unit as `gthread.c` so that they can be inlined without the need for link-time optimisation to be enabled. It makes the code a little less readable, but not much worse than what commit bc59e28bf6b0f70ff345aef80356d0076f44a0e7 already did. And perhaps the addition of the `inline` decorations to all the `_impl` functions will make it a bit clearer what their intended purpose is (platform-specific implementations). After applying this commit, the disassembly of `g_mutex_lock()` successfully contains the inlining for me: ``` => 0x00007ffff7f03d80 <+0>: xor %eax,%eax 0x00007ffff7f03d82 <+2>: mov $0x1,%edx 0x00007ffff7f03d87 <+7>: lock cmpxchg %edx,(%rdi) 0x00007ffff7f03d8b <+11>: jne 0x7ffff7f03d8e 0x00007ffff7f03d8d <+13>: ret 0x00007ffff7f03d8e <+14>: jmp 0x7ffff7f03610 ``` I considered making a similar change to the other APIs touched in #3399 (GContentType, GAppInfo, GSpawn), but they are all much less performance critical, so it’s probably not worth making their code more complex for that sake. Signed-off-by: Philip Withnall Fixes: #3417 --- glib/gthread-posix.c | 58 +++++++++++++++++++++---------------------- glib/gthread-win32.c | 56 ++++++++++++++++++++--------------------- glib/gthread.c | 57 ++++++++++++++++++++++++++++++++++++++++++ glib/gthreadprivate.h | 39 ----------------------------- glib/meson.build | 2 -- 5 files changed, 114 insertions(+), 98 deletions(-) diff --git a/glib/gthread-posix.c b/glib/gthread-posix.c index ee13d113e..4646ec8e5 100644 --- a/glib/gthread-posix.c +++ b/glib/gthread-posix.c @@ -146,19 +146,19 @@ g_mutex_get_impl (GMutex *mutex) } -void +G_ALWAYS_INLINE static inline void g_mutex_init_impl (GMutex *mutex) { mutex->p = g_mutex_impl_new (); } -void +G_ALWAYS_INLINE static inline void g_mutex_clear_impl (GMutex *mutex) { g_mutex_impl_free (mutex->p); } -void +G_ALWAYS_INLINE static inline void g_mutex_lock_impl (GMutex *mutex) { gint status; @@ -167,7 +167,7 @@ g_mutex_lock_impl (GMutex *mutex) g_thread_abort (status, "pthread_mutex_lock"); } -void +G_ALWAYS_INLINE static inline void g_mutex_unlock_impl (GMutex *mutex) { gint status; @@ -176,7 +176,7 @@ g_mutex_unlock_impl (GMutex *mutex) g_thread_abort (status, "pthread_mutex_unlock"); } -gboolean +G_ALWAYS_INLINE static inline gboolean g_mutex_trylock_impl (GMutex *mutex) { gint status; @@ -235,31 +235,31 @@ g_rec_mutex_get_impl (GRecMutex *rec_mutex) return impl; } -void +G_ALWAYS_INLINE static inline void g_rec_mutex_init_impl (GRecMutex *rec_mutex) { rec_mutex->p = g_rec_mutex_impl_new (); } -void +G_ALWAYS_INLINE static inline void g_rec_mutex_clear_impl (GRecMutex *rec_mutex) { g_rec_mutex_impl_free (rec_mutex->p); } -void +G_ALWAYS_INLINE static inline void g_rec_mutex_lock_impl (GRecMutex *mutex) { pthread_mutex_lock (g_rec_mutex_get_impl (mutex)); } -void +G_ALWAYS_INLINE static inline void g_rec_mutex_unlock_impl (GRecMutex *rec_mutex) { pthread_mutex_unlock (rec_mutex->p); } -gboolean +G_ALWAYS_INLINE static inline gboolean g_rec_mutex_trylock_impl (GRecMutex *rec_mutex) { if (pthread_mutex_trylock (g_rec_mutex_get_impl (rec_mutex)) != 0) @@ -309,19 +309,19 @@ g_rw_lock_get_impl (GRWLock *lock) return impl; } -void +G_ALWAYS_INLINE static inline void g_rw_lock_init_impl (GRWLock *rw_lock) { rw_lock->p = g_rw_lock_impl_new (); } -void +G_ALWAYS_INLINE static inline void g_rw_lock_clear_impl (GRWLock *rw_lock) { g_rw_lock_impl_free (rw_lock->p); } -void +G_ALWAYS_INLINE static inline void g_rw_lock_writer_lock_impl (GRWLock *rw_lock) { int retval = pthread_rwlock_wrlock (g_rw_lock_get_impl (rw_lock)); @@ -330,7 +330,7 @@ g_rw_lock_writer_lock_impl (GRWLock *rw_lock) g_critical ("Failed to get RW lock %p: %s", rw_lock, g_strerror (retval)); } -gboolean +G_ALWAYS_INLINE static inline gboolean g_rw_lock_writer_trylock_impl (GRWLock *rw_lock) { if (pthread_rwlock_trywrlock (g_rw_lock_get_impl (rw_lock)) != 0) @@ -339,13 +339,13 @@ g_rw_lock_writer_trylock_impl (GRWLock *rw_lock) return TRUE; } -void +G_ALWAYS_INLINE static inline void g_rw_lock_writer_unlock_impl (GRWLock *rw_lock) { pthread_rwlock_unlock (g_rw_lock_get_impl (rw_lock)); } -void +G_ALWAYS_INLINE static inline void g_rw_lock_reader_lock_impl (GRWLock *rw_lock) { int retval = pthread_rwlock_rdlock (g_rw_lock_get_impl (rw_lock)); @@ -354,7 +354,7 @@ g_rw_lock_reader_lock_impl (GRWLock *rw_lock) g_critical ("Failed to get RW lock %p: %s", rw_lock, g_strerror (retval)); } -gboolean +G_ALWAYS_INLINE static inline gboolean g_rw_lock_reader_trylock_impl (GRWLock *rw_lock) { if (pthread_rwlock_tryrdlock (g_rw_lock_get_impl (rw_lock)) != 0) @@ -363,7 +363,7 @@ g_rw_lock_reader_trylock_impl (GRWLock *rw_lock) return TRUE; } -void +G_ALWAYS_INLINE static inline void g_rw_lock_reader_unlock_impl (GRWLock *rw_lock) { pthread_rwlock_unlock (g_rw_lock_get_impl (rw_lock)); @@ -425,19 +425,19 @@ g_cond_get_impl (GCond *cond) return impl; } -void +G_ALWAYS_INLINE static inline void g_cond_init_impl (GCond *cond) { cond->p = g_cond_impl_new (); } -void +G_ALWAYS_INLINE static inline void g_cond_clear_impl (GCond *cond) { g_cond_impl_free (cond->p); } -void +G_ALWAYS_INLINE static inline void g_cond_wait_impl (GCond *cond, GMutex *mutex) { @@ -447,7 +447,7 @@ g_cond_wait_impl (GCond *cond, g_thread_abort (status, "pthread_cond_wait"); } -void +G_ALWAYS_INLINE static inline void g_cond_signal_impl (GCond *cond) { gint status; @@ -456,7 +456,7 @@ g_cond_signal_impl (GCond *cond) g_thread_abort (status, "pthread_cond_signal"); } -void +G_ALWAYS_INLINE static inline void g_cond_broadcast_impl (GCond *cond) { gint status; @@ -465,7 +465,7 @@ g_cond_broadcast_impl (GCond *cond) g_thread_abort (status, "pthread_cond_broadcast"); } -gboolean +G_ALWAYS_INLINE static inline gboolean g_cond_wait_until_impl (GCond *cond, GMutex *mutex, gint64 end_time) @@ -642,14 +642,14 @@ _g_private_get_impl (GPrivate *key) } } -gpointer +G_ALWAYS_INLINE static inline gpointer g_private_get_impl (GPrivate *key) { /* quote POSIX: No errors are returned from pthread_getspecific(). */ return pthread_getspecific (_g_private_get_impl (key)); } -void +G_ALWAYS_INLINE static inline void g_private_set_impl (GPrivate *key, gpointer value) { @@ -659,7 +659,7 @@ g_private_set_impl (GPrivate *key, g_thread_abort (status, "pthread_setspecific"); } -void +G_ALWAYS_INLINE static inline void g_private_replace_impl (GPrivate *key, gpointer value) { @@ -778,7 +778,7 @@ g_system_thread_new (GThreadFunc proxy, return (GRealThread *) thread; } -void +G_ALWAYS_INLINE static inline void g_thread_yield_impl (void) { sched_yield (); @@ -939,7 +939,7 @@ g_mutex_unlock_slowpath (GMutex *mutex, g_futex_simple (&mutex->i[0], (gsize) FUTEX_WAKE_PRIVATE, (gsize) 1, NULL); } -void +inline void g_mutex_lock_impl (GMutex *mutex) { /* empty -> owned and we're done. Anything else, and we need to wait... */ diff --git a/glib/gthread-win32.c b/glib/gthread-win32.c index 519e9b628..93084b961 100644 --- a/glib/gthread-win32.c +++ b/glib/gthread-win32.c @@ -78,30 +78,30 @@ g_thread_abort (gint status, */ /* {{{1 GMutex */ -void +G_ALWAYS_INLINE static inline void g_mutex_init_impl (GMutex *mutex) { InitializeSRWLock ((gpointer) mutex); } -void +G_ALWAYS_INLINE static inline void g_mutex_clear_impl (GMutex *mutex) { } -void +G_ALWAYS_INLINE static inline void g_mutex_lock_impl (GMutex *mutex) { AcquireSRWLockExclusive ((gpointer) mutex); } -gboolean +G_ALWAYS_INLINE static inline gboolean g_mutex_trylock_impl (GMutex *mutex) { return TryAcquireSRWLockExclusive ((gpointer) mutex); } -void +G_ALWAYS_INLINE static inline void g_mutex_unlock_impl (GMutex *mutex) { ReleaseSRWLockExclusive ((gpointer) mutex); @@ -143,31 +143,31 @@ g_rec_mutex_get_impl (GRecMutex *mutex) return impl; } -void +G_ALWAYS_INLINE static inline void g_rec_mutex_init_impl (GRecMutex *mutex) { mutex->p = g_rec_mutex_impl_new (); } -void +G_ALWAYS_INLINE static inline void g_rec_mutex_clear_impl (GRecMutex *mutex) { g_rec_mutex_impl_free (mutex->p); } -void +G_ALWAYS_INLINE static inline void g_rec_mutex_lock_impl (GRecMutex *mutex) { EnterCriticalSection (g_rec_mutex_get_impl (mutex)); } -void +G_ALWAYS_INLINE static inline void g_rec_mutex_unlock_impl (GRecMutex *mutex) { LeaveCriticalSection (mutex->p); } -gboolean +G_ALWAYS_INLINE static inline gboolean g_rec_mutex_trylock_impl (GRecMutex *mutex) { return TryEnterCriticalSection (g_rec_mutex_get_impl (mutex)); @@ -175,85 +175,85 @@ g_rec_mutex_trylock_impl (GRecMutex *mutex) /* {{{1 GRWLock */ -void +G_ALWAYS_INLINE static inline void g_rw_lock_init_impl (GRWLock *lock) { InitializeSRWLock ((gpointer) lock); } -void +G_ALWAYS_INLINE static inline void g_rw_lock_clear_impl (GRWLock *lock) { } -void +G_ALWAYS_INLINE static inline void g_rw_lock_writer_lock_impl (GRWLock *lock) { AcquireSRWLockExclusive ((gpointer) lock); } -gboolean +G_ALWAYS_INLINE static inline gboolean g_rw_lock_writer_trylock_impl (GRWLock *lock) { return TryAcquireSRWLockExclusive ((gpointer) lock); } -void +G_ALWAYS_INLINE static inline void g_rw_lock_writer_unlock_impl (GRWLock *lock) { ReleaseSRWLockExclusive ((gpointer) lock); } -void +G_ALWAYS_INLINE static inline void g_rw_lock_reader_lock_impl (GRWLock *lock) { AcquireSRWLockShared ((gpointer) lock); } -gboolean +G_ALWAYS_INLINE static inline gboolean g_rw_lock_reader_trylock_impl (GRWLock *lock) { return TryAcquireSRWLockShared ((gpointer) lock); } -void +G_ALWAYS_INLINE static inline void g_rw_lock_reader_unlock_impl (GRWLock *lock) { ReleaseSRWLockShared ((gpointer) lock); } /* {{{1 GCond */ -void +G_ALWAYS_INLINE static inline void g_cond_init_impl (GCond *cond) { InitializeConditionVariable ((gpointer) cond); } -void +G_ALWAYS_INLINE static inline void g_cond_clear_impl (GCond *cond) { } -void +G_ALWAYS_INLINE static inline void g_cond_signal_impl (GCond *cond) { WakeConditionVariable ((gpointer) cond); } -void +G_ALWAYS_INLINE static inline void g_cond_broadcast_impl (GCond *cond) { WakeAllConditionVariable ((gpointer) cond); } -void +G_ALWAYS_INLINE static inline void g_cond_wait_impl (GCond *cond, GMutex *entered_mutex) { SleepConditionVariableSRW ((gpointer) cond, (gpointer) entered_mutex, INFINITE, 0); } -gboolean +G_ALWAYS_INLINE static inline gboolean g_cond_wait_until_impl (GCond *cond, GMutex *entered_mutex, gint64 end_time) @@ -365,20 +365,20 @@ _g_private_get_impl (GPrivate *key) return impl; } -gpointer +G_ALWAYS_INLINE static inline gpointer g_private_get_impl (GPrivate *key) { return TlsGetValue (_g_private_get_impl (key)); } -void +G_ALWAYS_INLINE static inline void g_private_set_impl (GPrivate *key, gpointer value) { TlsSetValue (_g_private_get_impl (key), value); } -void +G_ALWAYS_INLINE static inline void g_private_replace_impl (GPrivate *key, gpointer value) { @@ -521,7 +521,7 @@ error: } } -void +G_ALWAYS_INLINE static inline void g_thread_yield_impl (void) { Sleep(0); diff --git a/glib/gthread.c b/glib/gthread.c index 568672aae..1861caaaa 100644 --- a/glib/gthread.c +++ b/glib/gthread.c @@ -66,6 +66,63 @@ #include "glib_trace.h" #include "gtrace-private.h" +/* In order that the API can be defined in one place (this file), the platform + * specific code is moved out into separate files so this one doesn’t turn into + * a massive #ifdef tangle. + * + * To avoid the functions in this file becoming tiny trampolines (`jmp` to the + * relevant `_impl` function only), which would be a performance hit on some + * hot paths, #include the platform specific implementations. They are marked as + * `inline` so should be inlined correctly by the compiler without the need for + * link time optimisation or any fancy tricks. + */ +static inline void g_mutex_init_impl (GMutex *mutex); +static inline void g_mutex_clear_impl (GMutex *mutex); +static inline void g_mutex_lock_impl (GMutex *mutex); +static inline void g_mutex_unlock_impl (GMutex *mutex); +static inline gboolean g_mutex_trylock_impl (GMutex *mutex); + +static inline void g_rec_mutex_init_impl (GRecMutex *rec_mutex); +static inline void g_rec_mutex_clear_impl (GRecMutex *rec_mutex); +static inline void g_rec_mutex_lock_impl (GRecMutex *mutex); +static inline void g_rec_mutex_unlock_impl (GRecMutex *rec_mutex); +static inline gboolean g_rec_mutex_trylock_impl (GRecMutex *rec_mutex); + +static inline void g_rw_lock_init_impl (GRWLock *rw_lock); +static inline void g_rw_lock_clear_impl (GRWLock *rw_lock); +static inline void g_rw_lock_writer_lock_impl (GRWLock *rw_lock); +static inline gboolean g_rw_lock_writer_trylock_impl (GRWLock *rw_lock); +static inline void g_rw_lock_writer_unlock_impl (GRWLock *rw_lock); +static inline void g_rw_lock_reader_lock_impl (GRWLock *rw_lock); +static inline gboolean g_rw_lock_reader_trylock_impl (GRWLock *rw_lock); +static inline void g_rw_lock_reader_unlock_impl (GRWLock *rw_lock); + +static inline void g_cond_init_impl (GCond *cond); +static inline void g_cond_clear_impl (GCond *cond); +static inline void g_cond_wait_impl (GCond *cond, + GMutex *mutex); +static inline void g_cond_signal_impl (GCond *cond); +static inline void g_cond_broadcast_impl (GCond *cond); +static inline gboolean g_cond_wait_until_impl (GCond *cond, + GMutex *mutex, + gint64 end_time); + +static inline gpointer g_private_get_impl (GPrivate *key); +static inline void g_private_set_impl (GPrivate *key, + gpointer value); +static inline void g_private_replace_impl (GPrivate *key, + gpointer value); + +static inline void g_thread_yield_impl (void); + +#if defined(THREADS_POSIX) +#include "gthread-posix.c" +#elif defined(THREADS_WIN32) +#include "gthread-win32.c" +#else +#error "No threads implementation" +#endif + /* G_LOCK Documentation {{{1 ---------------------------------------------- */ /** diff --git a/glib/gthreadprivate.h b/glib/gthreadprivate.h index 0fd1598c9..28d190c1a 100644 --- a/glib/gthreadprivate.h +++ b/glib/gthreadprivate.h @@ -175,43 +175,4 @@ guint g_thread_n_created (void); gpointer g_private_set_alloc0 (GPrivate *key, gsize size); -void g_mutex_init_impl (GMutex *mutex); -void g_mutex_clear_impl (GMutex *mutex); -void g_mutex_lock_impl (GMutex *mutex); -void g_mutex_unlock_impl (GMutex *mutex); -gboolean g_mutex_trylock_impl (GMutex *mutex); - -void g_rec_mutex_init_impl (GRecMutex *rec_mutex); -void g_rec_mutex_clear_impl (GRecMutex *rec_mutex); -void g_rec_mutex_lock_impl (GRecMutex *mutex); -void g_rec_mutex_unlock_impl (GRecMutex *rec_mutex); -gboolean g_rec_mutex_trylock_impl (GRecMutex *rec_mutex); - -void g_rw_lock_init_impl (GRWLock *rw_lock); -void g_rw_lock_clear_impl (GRWLock *rw_lock); -void g_rw_lock_writer_lock_impl (GRWLock *rw_lock); -gboolean g_rw_lock_writer_trylock_impl (GRWLock *rw_lock); -void g_rw_lock_writer_unlock_impl (GRWLock *rw_lock); -void g_rw_lock_reader_lock_impl (GRWLock *rw_lock); -gboolean g_rw_lock_reader_trylock_impl (GRWLock *rw_lock); -void g_rw_lock_reader_unlock_impl (GRWLock *rw_lock); - -void g_cond_init_impl (GCond *cond); -void g_cond_clear_impl (GCond *cond); -void g_cond_wait_impl (GCond *cond, - GMutex *mutex); -void g_cond_signal_impl (GCond *cond); -void g_cond_broadcast_impl (GCond *cond); -gboolean g_cond_wait_until_impl (GCond *cond, - GMutex *mutex, - gint64 end_time); - -gpointer g_private_get_impl (GPrivate *key); -void g_private_set_impl (GPrivate *key, - gpointer value); -void g_private_replace_impl (GPrivate *key, - gpointer value); - -void g_thread_yield_impl (void); - #endif /* __G_THREADPRIVATE_H__ */ diff --git a/glib/meson.build b/glib/meson.build index 1ed46f0cb..9f1515b82 100644 --- a/glib/meson.build +++ b/glib/meson.build @@ -391,8 +391,6 @@ if glib_have_carbon platform_deps += [framework_dep] endif -glib_sources += files('gthread-@0@.c'.format(threads_implementation)) - if enable_dtrace glib_dtrace_obj = dtrace_obj_gen.process('glib_probes.d') glib_dtrace_hdr = dtrace_hdr_gen.process('glib_probes.d')