From 9a16f2653b8240f617f30e4bcb8e913e5846d42f Mon Sep 17 00:00:00 2001
From: Philip Withnall <withnall@endlessm.com>
Date: Sat, 21 Sep 2019 16:09:41 +0200
Subject: [PATCH] gatomic: Reorder memory barriers in fallback atomic
 operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the compiler doesn’t provide modern (C++11) atomic builtins (which is
now quite unlikely), we implement our own using the `__sync_synchronize()`
memory barrier. As Behdad and others have pointed out, though, the
implementation didn’t follow the same semantics as we use with the C++11
builtins — `__ATOMIC_SEQ_CST`.

Fix the use of memory barriers to provide `__ATOMIC_SEQ_CST` semantics.
In particular, this fixes the following common pattern:
```
GObject *obj = my_object_new ();
g_atomic_pointer_set (&shared_ptr, obj);
```

Previously this would have expanded to:
```
GObject *obj = my_object_new ();
*shared_ptr = obj;
__sync_synchronize ();
```

While the compiler would not have reordered the stores to `obj` and
`shared_ptr` within the code on one thread (due to the dependency
between them), the memory system might have made the write to
`shared_ptr` visible to other threads before the write to `obj` — if
they then dereferenced `shared_ptr` before seeing the write to `obj`,
that would be a bug.

Instead, the expansion is now:
```
GObject *obj = my_object_new ();
__sync_synchronize ();
*shared_ptr = obj;
```

This ensures that the write to `obj` is visible to all threads before
any write to `shared_ptr` is visible to any threads. For completeness,
`__sync_synchronize()` is augmented with a compiler barrier to ensure
that no loads/stores can be reordered locally before or after it.

Tested by disabling the C++11 atomic implementation and running:
```
meson test --repeat 1000 atomic atomic-test
```

Signed-off-by: Philip Withnall <withnall@endlessm.com>

Fixes: #1449
---
 glib/gatomic.h | 44 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/glib/gatomic.h b/glib/gatomic.h
index 8af201814..61a99adf2 100644
--- a/glib/gatomic.h
+++ b/glib/gatomic.h
@@ -120,32 +120,68 @@ G_END_DECLS
 
 #else /* defined(__ATOMIC_SEQ_CST) */
 
+/* We want to achieve __ATOMIC_SEQ_CST semantics here. See
+ * https://en.cppreference.com/w/c/atomic/memory_order#Constants. For load
+ * operations, that means performing an *acquire*:
+ * > A load operation with this memory order performs the acquire operation on
+ * > the affected memory location: no reads or writes in the current thread can
+ * > be reordered before this load. All writes in other threads that release
+ * > the same atomic variable are visible in the current thread.
+ *
+ * “no reads or writes in the current thread can be reordered before this load”
+ * is implemented using a compiler barrier (a no-op `__asm__` section) to
+ * prevent instruction reordering. Writes in other threads are synchronised
+ * using `__sync_synchronize()`. It’s unclear from the GCC documentation whether
+ * `__sync_synchronize()` acts as a compiler barrier, hence our explicit use of
+ * one.
+ *
+ * For store operations, `__ATOMIC_SEQ_CST` means performing a *release*:
+ * > A store operation with this memory order performs the release operation:
+ * > no reads or writes in the current thread can be reordered after this store.
+ * > All writes in the current thread are visible in other threads that acquire
+ * > the same atomic variable (see Release-Acquire ordering below) and writes
+ * > that carry a dependency into the atomic variable become visible in other
+ * > threads that consume the same atomic (see Release-Consume ordering below).
+ *
+ * “no reads or writes in the current thread can be reordered after this store”
+ * is implemented using a compiler barrier to prevent instruction reordering.
+ * “All writes in the current thread are visible in other threads” is implemented
+ * using `__sync_synchronize()`; similarly for “writes that carry a dependency”.
+ */
 #define g_atomic_int_get(atomic) \
   (G_GNUC_EXTENSION ({                                                       \
+    gint gaig_result;                                                        \
     G_STATIC_ASSERT (sizeof *(atomic) == sizeof (gint));                     \
     (void) (0 ? *(atomic) ^ *(atomic) : 1);                                  \
+    gaig_result = (gint) *(atomic);                                          \
     __sync_synchronize ();                                                   \
-    (gint) *(atomic);                                                        \
+    __asm__ __volatile__ ("" : : : "memory");                                \
+    gaig_result;                                                             \
   }))
 #define g_atomic_int_set(atomic, newval) \
   (G_GNUC_EXTENSION ({                                                       \
     G_STATIC_ASSERT (sizeof *(atomic) == sizeof (gint));                     \
     (void) (0 ? *(atomic) ^ (newval) : 1);                                   \
-    *(atomic) = (newval);                                                    \
     __sync_synchronize ();                                                   \
+    __asm__ __volatile__ ("" : : : "memory");                                \
+    *(atomic) = (newval);                                                    \
   }))
 #define g_atomic_pointer_get(atomic) \
   (G_GNUC_EXTENSION ({                                                       \
+    gpointer gapg_result;                                                    \
     G_STATIC_ASSERT (sizeof *(atomic) == sizeof (gpointer));                 \
+    gapg_result = (gpointer) *(atomic);                                      \
     __sync_synchronize ();                                                   \
-    (gpointer) *(atomic);                                                    \
+    __asm__ __volatile__ ("" : : : "memory");                                \
+    gapg_result;                                                             \
   }))
 #define g_atomic_pointer_set(atomic, newval) \
   (G_GNUC_EXTENSION ({                                                       \
     G_STATIC_ASSERT (sizeof *(atomic) == sizeof (gpointer));                 \
     (void) (0 ? (gpointer) *(atomic) : NULL);                                \
-    *(atomic) = (__typeof__ (*(atomic))) (gsize) (newval);                   \
     __sync_synchronize ();                                                   \
+    __asm__ __volatile__ ("" : : : "memory");                                \
+    *(atomic) = (__typeof__ (*(atomic))) (gsize) (newval);                   \
   }))
 
 #endif /* !defined(__ATOMIC_SEQ_CST) */