From 9a16f2653b8240f617f30e4bcb8e913e5846d42f Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Sat, 21 Sep 2019 16:09:41 +0200 Subject: [PATCH] gatomic: Reorder memory barriers in fallback atomic operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the compiler doesn’t provide modern (C++11) atomic builtins (which is now quite unlikely), we implement our own using the `__sync_synchronize()` memory barrier. As Behdad and others have pointed out, though, the implementation didn’t follow the same semantics as we use with the C++11 builtins — `__ATOMIC_SEQ_CST`. Fix the use of memory barriers to provide `__ATOMIC_SEQ_CST` semantics. In particular, this fixes the following common pattern: ``` GObject *obj = my_object_new (); g_atomic_pointer_set (&shared_ptr, obj); ``` Previously this would have expanded to: ``` GObject *obj = my_object_new (); *shared_ptr = obj; __sync_synchronize (); ``` While the compiler would not have reordered the stores to `obj` and `shared_ptr` within the code on one thread (due to the dependency between them), the memory system might have made the write to `shared_ptr` visible to other threads before the write to `obj` — if they then dereferenced `shared_ptr` before seeing the write to `obj`, that would be a bug. Instead, the expansion is now: ``` GObject *obj = my_object_new (); __sync_synchronize (); *shared_ptr = obj; ``` This ensures that the write to `obj` is visible to all threads before any write to `shared_ptr` is visible to any threads. For completeness, `__sync_synchronize()` is augmented with a compiler barrier to ensure that no loads/stores can be reordered locally before or after it. Tested by disabling the C++11 atomic implementation and running: ``` meson test --repeat 1000 atomic atomic-test ``` Signed-off-by: Philip Withnall Fixes: #1449 --- glib/gatomic.h | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/glib/gatomic.h b/glib/gatomic.h index 8af201814..61a99adf2 100644 --- a/glib/gatomic.h +++ b/glib/gatomic.h @@ -120,32 +120,68 @@ G_END_DECLS #else /* defined(__ATOMIC_SEQ_CST) */ +/* We want to achieve __ATOMIC_SEQ_CST semantics here. See + * https://en.cppreference.com/w/c/atomic/memory_order#Constants. For load + * operations, that means performing an *acquire*: + * > A load operation with this memory order performs the acquire operation on + * > the affected memory location: no reads or writes in the current thread can + * > be reordered before this load. All writes in other threads that release + * > the same atomic variable are visible in the current thread. + * + * “no reads or writes in the current thread can be reordered before this load” + * is implemented using a compiler barrier (a no-op `__asm__` section) to + * prevent instruction reordering. Writes in other threads are synchronised + * using `__sync_synchronize()`. It’s unclear from the GCC documentation whether + * `__sync_synchronize()` acts as a compiler barrier, hence our explicit use of + * one. + * + * For store operations, `__ATOMIC_SEQ_CST` means performing a *release*: + * > A store operation with this memory order performs the release operation: + * > no reads or writes in the current thread can be reordered after this store. + * > All writes in the current thread are visible in other threads that acquire + * > the same atomic variable (see Release-Acquire ordering below) and writes + * > that carry a dependency into the atomic variable become visible in other + * > threads that consume the same atomic (see Release-Consume ordering below). + * + * “no reads or writes in the current thread can be reordered after this store” + * is implemented using a compiler barrier to prevent instruction reordering. + * “All writes in the current thread are visible in other threads” is implemented + * using `__sync_synchronize()`; similarly for “writes that carry a dependency”. + */ #define g_atomic_int_get(atomic) \ (G_GNUC_EXTENSION ({ \ + gint gaig_result; \ G_STATIC_ASSERT (sizeof *(atomic) == sizeof (gint)); \ (void) (0 ? *(atomic) ^ *(atomic) : 1); \ + gaig_result = (gint) *(atomic); \ __sync_synchronize (); \ - (gint) *(atomic); \ + __asm__ __volatile__ ("" : : : "memory"); \ + gaig_result; \ })) #define g_atomic_int_set(atomic, newval) \ (G_GNUC_EXTENSION ({ \ G_STATIC_ASSERT (sizeof *(atomic) == sizeof (gint)); \ (void) (0 ? *(atomic) ^ (newval) : 1); \ - *(atomic) = (newval); \ __sync_synchronize (); \ + __asm__ __volatile__ ("" : : : "memory"); \ + *(atomic) = (newval); \ })) #define g_atomic_pointer_get(atomic) \ (G_GNUC_EXTENSION ({ \ + gpointer gapg_result; \ G_STATIC_ASSERT (sizeof *(atomic) == sizeof (gpointer)); \ + gapg_result = (gpointer) *(atomic); \ __sync_synchronize (); \ - (gpointer) *(atomic); \ + __asm__ __volatile__ ("" : : : "memory"); \ + gapg_result; \ })) #define g_atomic_pointer_set(atomic, newval) \ (G_GNUC_EXTENSION ({ \ G_STATIC_ASSERT (sizeof *(atomic) == sizeof (gpointer)); \ (void) (0 ? (gpointer) *(atomic) : NULL); \ - *(atomic) = (__typeof__ (*(atomic))) (gsize) (newval); \ __sync_synchronize (); \ + __asm__ __volatile__ ("" : : : "memory"); \ + *(atomic) = (__typeof__ (*(atomic))) (gsize) (newval); \ })) #endif /* !defined(__ATOMIC_SEQ_CST) */