mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-23 20:46:14 +01:00
bitlock: hand-code assembly version for x86
The __sync_fetch_and_or() operation on x86 is a bit suboptimal when the result isn't ignored. Normally we could use the 'lock or' assembly instruction to accomplish this, but this instruction discards the previous value. In order to work around this issue, GCC is forced to emit a compare-and-exchange loop. We can easily use the 'lock bts' instruction, though. It can't be used in the general case for __sync_fetch_and_or() but it works great for our case (test and set a single bit). I filed a bug against GCC[1] to get this exposed as a new intrinsic (or have the optimiser detect the case) but until then we'll hand-code it on x86 and amd64. The uncontended case sees a 31% improvement on my test machine. [1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49244 https://bugzilla.gnome.org/show_bug.cgi?id=651467
This commit is contained in:
parent
f2bd54d0c5
commit
1a80405a36
@ -205,6 +205,33 @@ void
|
||||
g_bit_lock (volatile gint *address,
|
||||
gint lock_bit)
|
||||
{
|
||||
#if defined (__GNUC__) && (defined (i386) || defined (__amd64__))
|
||||
retry:
|
||||
asm volatile goto ("lock bts %1, (%0)\n"
|
||||
"jc %l[contended]"
|
||||
: /* no output */
|
||||
: "r" (address), "r" (lock_bit)
|
||||
: "cc", "memory"
|
||||
: contended);
|
||||
return;
|
||||
|
||||
contended:
|
||||
{
|
||||
guint mask = 1u << lock_bit;
|
||||
guint v;
|
||||
|
||||
v = g_atomic_int_get (address);
|
||||
if (v & mask)
|
||||
{
|
||||
guint class = ((gsize) address) % G_N_ELEMENTS (g_bit_lock_contended);
|
||||
|
||||
g_atomic_int_add (&g_bit_lock_contended[class], +1);
|
||||
g_futex_wait (address, v);
|
||||
g_atomic_int_add (&g_bit_lock_contended[class], -1);
|
||||
}
|
||||
}
|
||||
goto retry;
|
||||
#else
|
||||
guint mask = 1u << lock_bit;
|
||||
guint v;
|
||||
|
||||
@ -221,6 +248,7 @@ g_bit_lock (volatile gint *address,
|
||||
|
||||
goto retry;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@ -248,12 +276,25 @@ gboolean
|
||||
g_bit_trylock (volatile gint *address,
|
||||
gint lock_bit)
|
||||
{
|
||||
#if defined (__GNUC__) && (defined (i386) || defined (__amd64__))
|
||||
gboolean result;
|
||||
|
||||
asm volatile ("lock bts %2, (%1)\n"
|
||||
"setnc %%al\n"
|
||||
"movzx %%al, %0"
|
||||
: "=r" (result)
|
||||
: "r" (address), "r" (lock_bit)
|
||||
: "cc", "memory");
|
||||
|
||||
return result;
|
||||
#else
|
||||
guint mask = 1u << lock_bit;
|
||||
guint v;
|
||||
|
||||
v = g_atomic_int_or (address, mask);
|
||||
|
||||
return ~v & mask;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@ -275,11 +316,21 @@ void
|
||||
g_bit_unlock (volatile gint *address,
|
||||
gint lock_bit)
|
||||
{
|
||||
guint class = ((gsize) address) % G_N_ELEMENTS (g_bit_lock_contended);
|
||||
#if defined (__GNUC__) && (defined (i386) || defined (__amd64__))
|
||||
asm volatile ("lock btr %1, (%0)"
|
||||
: /* no output */
|
||||
: "r" (address), "r" (lock_bit)
|
||||
: "cc", "memory");
|
||||
#else
|
||||
guint mask = 1u << lock_bit;
|
||||
|
||||
g_atomic_int_and (address, ~mask);
|
||||
#endif
|
||||
|
||||
if (g_atomic_int_get (&g_bit_lock_contended[class]))
|
||||
g_futex_wake (address);
|
||||
{
|
||||
guint class = ((gsize) address) % G_N_ELEMENTS (g_bit_lock_contended);
|
||||
|
||||
if (g_atomic_int_get (&g_bit_lock_contended[class]))
|
||||
g_futex_wake (address);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user