Merge branch '1323-aarch64-mem-barrier' into 'master'

gthread: Use C11-style memory consistency to speed up g_once()

Closes #1323

See merge request GNOME/glib!1364
This commit is contained in:
Sebastian Dröge 2020-05-19 17:22:41 +00:00
commit 1d61c97761
6 changed files with 113 additions and 29 deletions

View File

@ -147,7 +147,6 @@ typedef unsigned @glib_intptr_type_define@ guintptr;
#define G_THREADS_ENABLED
#define G_THREADS_IMPL_@g_threads_impl_def@
#mesondefine G_ATOMIC_OP_MEMORY_BARRIER_NEEDED
#mesondefine G_ATOMIC_LOCK_FREE
#define GINT16_TO_@g_bs_native@(val) ((gint16) (val))

View File

@ -630,13 +630,25 @@ g_once_impl (GOnce *once,
if (once->status != G_ONCE_STATUS_READY)
{
gpointer retval;
once->status = G_ONCE_STATUS_PROGRESS;
g_mutex_unlock (&g_once_mutex);
once->retval = func (arg);
retval = func (arg);
g_mutex_lock (&g_once_mutex);
/* We prefer the new C11-style atomic extension of GCC if available. If not,
* fall back to always locking. */
#if defined(G_ATOMIC_LOCK_FREE) && defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) && defined(__ATOMIC_SEQ_CST)
/* Only the second store needs to be atomic, as the two writes are related
* by a happens-before relationship here. */
once->retval = retval;
__atomic_store_n (&once->status, G_ONCE_STATUS_READY, __ATOMIC_RELEASE);
#else
once->retval = retval;
once->status = G_ONCE_STATUS_READY;
#endif
g_cond_broadcast (&g_once_cond);
}

View File

@ -234,14 +234,23 @@ GLIB_AVAILABLE_IN_ALL
void g_once_init_leave (volatile void *location,
gsize result);
#ifdef G_ATOMIC_OP_MEMORY_BARRIER_NEEDED
# define g_once(once, func, arg) g_once_impl ((once), (func), (arg))
#else /* !G_ATOMIC_OP_MEMORY_BARRIER_NEEDED*/
/* Use C11-style atomic extensions to check the fast path for status=ready. If
* they are not available, fall back to using a mutex and condition variable in
* g_once_impl().
*
* On the C11-style codepath, only the load of once->status needs to be atomic,
* as the writes to it and once->retval in g_once_impl() are related by a
* happens-before relation. Release-acquire semantics are defined such that any
* atomic/non-atomic write which happens-before a store/release is guaranteed to
* be seen by the load/acquire of the same atomic variable. */
#if defined(G_ATOMIC_LOCK_FREE) && defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) && defined(__ATOMIC_SEQ_CST)
# define g_once(once, func, arg) \
(((once)->status == G_ONCE_STATUS_READY) ? \
((__atomic_load_n (&(once)->status, __ATOMIC_ACQUIRE) == G_ONCE_STATUS_READY) ? \
(once)->retval : \
g_once_impl ((once), (func), (arg)))
#endif /* G_ATOMIC_OP_MEMORY_BARRIER_NEEDED */
#else
# define g_once(once, func, arg) g_once_impl ((once), (func), (arg))
#endif
#ifdef __GNUC__
# define g_once_init_enter(location) \

View File

@ -34,11 +34,13 @@ do_once (gpointer data)
}
static void
test_once1 (void)
test_once_single_threaded (void)
{
GOnce once = G_ONCE_INIT;
gpointer res;
g_test_summary ("Test g_once() usage from a single thread");
g_assert (once.status == G_ONCE_STATUS_NOTCALLED);
res = g_once (&once, do_once, NULL);
@ -50,11 +52,80 @@ test_once1 (void)
g_assert_cmpint (GPOINTER_TO_INT (res), ==, 1);
}
static GOnce once_multi_threaded = G_ONCE_INIT;
static gint once_multi_threaded_counter = 0;
static GCond once_multi_threaded_cond;
static GMutex once_multi_threaded_mutex;
static guint once_multi_threaded_n_threads_waiting = 0;
static gpointer
do_once_multi_threaded (gpointer data)
{
gint old_value;
/* While this function should only ever be executed once, by one thread,
* we should use atomics to ensure that if there were a bug, writes to
* `once_multi_threaded_counter` from multiple threads would not get lost and
* mean the test erroneously succeeded. */
old_value = g_atomic_int_add (&once_multi_threaded_counter, 1);
return GINT_TO_POINTER (old_value + 1);
}
static gpointer
once_thread_func (gpointer data)
{
gpointer res;
guint n_threads_expected = GPOINTER_TO_UINT (data);
/* Dont immediately call g_once(), otherwise the first thread to be created
* will end up calling the once-function, and there will be very little
* contention. */
g_mutex_lock (&once_multi_threaded_mutex);
once_multi_threaded_n_threads_waiting++;
g_cond_broadcast (&once_multi_threaded_cond);
while (once_multi_threaded_n_threads_waiting < n_threads_expected)
g_cond_wait (&once_multi_threaded_cond, &once_multi_threaded_mutex);
g_mutex_unlock (&once_multi_threaded_mutex);
/* Actually run the test. */
res = g_once (&once_multi_threaded, do_once_multi_threaded, NULL);
g_assert_cmpint (GPOINTER_TO_INT (res), ==, 1);
return NULL;
}
static void
test_once2 (void)
test_once_multi_threaded (void)
{
guint i;
GThread *threads[1000];
g_test_summary ("Test g_once() usage from multiple threads");
for (i = 0; i < G_N_ELEMENTS (threads); i++)
threads[i] = g_thread_new ("once-multi-threaded",
once_thread_func,
GUINT_TO_POINTER (G_N_ELEMENTS (threads)));
/* All threads have started up, so start the test. */
g_cond_broadcast (&once_multi_threaded_cond);
for (i = 0; i < G_N_ELEMENTS (threads); i++)
g_thread_join (threads[i]);
g_assert_cmpint (g_atomic_int_get (&once_multi_threaded_counter), ==, 1);
}
static void
test_once_init_single_threaded (void)
{
static gsize init = 0;
g_test_summary ("Test g_once_init_{enter,leave}() usage from a single thread");
if (g_once_init_enter (&init))
{
g_assert (TRUE);
@ -96,15 +167,17 @@ thread_func (gpointer data)
}
static void
test_once3 (void)
test_once_init_multi_threaded (void)
{
gint i;
GThread *threads[THREADS];
g_test_summary ("Test g_once_init_{enter,leave}() usage from multiple threads");
shared = 0;
for (i = 0; i < THREADS; i++)
threads[i] = g_thread_new ("once3", thread_func, NULL);
threads[i] = g_thread_new ("once-init-multi-threaded", thread_func, NULL);
for (i = 0; i < THREADS; i++)
g_thread_join (threads[i]);
@ -113,10 +186,12 @@ test_once3 (void)
}
static void
test_once4 (void)
test_once_init_string (void)
{
static const gchar *val;
g_test_summary ("Test g_once_init_{enter,leave}() usage with a string");
if (g_once_init_enter (&val))
g_once_init_leave (&val, "foo");
@ -128,10 +203,11 @@ main (int argc, char *argv[])
{
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/thread/once1", test_once1);
g_test_add_func ("/thread/once2", test_once2);
g_test_add_func ("/thread/once3", test_once3);
g_test_add_func ("/thread/once4", test_once4);
g_test_add_func ("/once/single-threaded", test_once_single_threaded);
g_test_add_func ("/once/multi-threaded", test_once_multi_threaded);
g_test_add_func ("/once-init/single-threaded", test_once_init_single_threaded);
g_test_add_func ("/once-init/multi-threaded", test_once_init_multi_threaded);
g_test_add_func ("/once-init/string", test_once_init_string);
return g_test_run ();
}

View File

@ -105,7 +105,7 @@ foreach test_name, extra_args : gobject_tests
# FIXME: https://gitlab.gnome.org/GNOME/glib/issues/1316
# aka https://bugs.debian.org/880883
if test_name == 'closure-refcount' and ['arm', 'aarch64'].contains(host_cpu_family)
if test_name == 'closure-refcount' and ['arm', 'aarch64'].contains(host_machine.cpu_family())
timeout = timeout * 10
endif

View File

@ -1676,18 +1676,6 @@ foreach d : inet_defines
glibconfig_conf.set(d[1], val)
endforeach
# We need a more robust approach here...
host_cpu_family = host_machine.cpu_family()
if host_cpu_family == 'x86' or host_cpu_family == 'x86_64' or host_cpu_family == 's390' or host_cpu_family == 's390x' or host_cpu_family.startswith('arm') or host_cpu_family == 'aarch64' or host_cpu_family.startswith('crisv32') or host_cpu_family.startswith('etrax')
glib_memory_barrier_needed = false
elif host_cpu_family.startswith('sparc') or host_cpu_family.startswith('alpha') or host_cpu_family.startswith('powerpc') or host_cpu_family == 'ia64'
glib_memory_barrier_needed = true
else
warning('Unknown host cpu: ' + host_cpu_family)
glib_memory_barrier_needed = true
endif
glibconfig_conf.set('G_ATOMIC_OP_MEMORY_BARRIER_NEEDED', glib_memory_barrier_needed)
# We need to decide at configure time if GLib will use real atomic
# operations ("lock free") or emulated ones with a mutex. This is
# because we must put this information in glibconfig.h so we know if