From: Torvald Riegel Date: Tue, 23 Jun 2015 13:22:25 +0000 (+0200) Subject: Clean up BUSY_WAIT_NOP and atomic_delay. X-Git-Tag: glibc-2.22~123 X-Git-Url: https://sourceware.org/git/?a=commitdiff_plain;h=4eb984d3ab5641ce7992204756ac15a61f5f7181;p=glibc.git Clean up BUSY_WAIT_NOP and atomic_delay. This patch combines BUSY_WAIT_NOP and atomic_delay into a new atomic_spin_nop function and adjusts all clients. The new function is put into atomic.h because what is best done in a spin loop is architecture-specific, and atomics must be used for spinning. The function name is meant to tell users that this has no effect on synchronization semantics but is a performance aid for spinning. --- diff --git a/ChangeLog b/ChangeLog index 41176b5fc8..0e22a702e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +2015-06-30 Torvald Riegel + + * sysdeps/unix/sysv/linux/i386/lowlevellock.h (BUSY_WAIT_NOP): Remove. + * sysdeps/unix/sysv/linux/x86_64/lowlevellock.h (BUSY_WAIT_NOP): + Likewise. + * sysdeps/i386/i486/bits/atomic.h (atomic_delay): Rename to + atomic_spin_nop. + * sysdeps/x86_64/bits/atomic.h: Likewise. + * sysdeps/unix/sysv/linux/sparc/lowlevellock.h (BUSY_WAIT_NOP): Rename + to atomic_spin_nop and move ... + * sysdeps/sparc/sparc32/sparcv9/bits/atomic.h (atomic_spin_nop): + ... here and ... + * sysdeps/sparc/sparc64/bits/atomic.h: ... here. + * nptl/pthread_mutex_lock.c (__pthread_mutex_lock): Use + atomic_spin_nop instead of BUSY_WAIT_NOP. + * nptl/pthread_mutex_timedlock.c (__pthread_mutex_timedlock): + Likewise. + * sysdeps/nacl/lll_timedwait_tid.c (__lll_timedwait_tid): Likewise. + * sysdeps/nacl/lowlevellock.h (BUSY_WAIT_NOP): Remove. + (lll_wait_tid): Use atomic_spin_nop instead of BUSY_WAIT_NOP. + * nscd/nscd-client.h (__nscd_acquire_maplock): Use atomic_spin_nop + instead of atomic_delay. + 2015-06-29 Joseph Myers [BZ #18613] diff --git a/include/atomic.h b/include/atomic.h index 7fd70c4c81..221bea0ad3 100644 --- a/include/atomic.h +++ b/include/atomic.h @@ -754,9 +754,10 @@ void __atomic_link_error (void); #endif /* !USE_ATOMIC_COMPILER_BUILTINS */ - -#ifndef atomic_delay -# define atomic_delay() do { /* nothing */ } while (0) +/* This operation does not affect synchronization semantics but can be used + in the body of a spin loop to potentially improve its efficiency. */ +#ifndef atomic_spin_nop +# define atomic_spin_nop() do { /* nothing */ } while (0) #endif #endif /* atomic.h */ diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c index 96075129b8..9a3b46624d 100644 --- a/nptl/pthread_mutex_lock.c +++ b/nptl/pthread_mutex_lock.c @@ -23,6 +23,7 @@ #include #include #include "pthreadP.h" +#include #include #include @@ -135,10 +136,7 @@ __pthread_mutex_lock (mutex) LLL_MUTEX_LOCK (mutex); break; } - -#ifdef BUSY_WAIT_NOP - BUSY_WAIT_NOP; -#endif + atomic_spin_nop (); } while (LLL_MUTEX_TRYLOCK (mutex) != 0); diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c index 109a46a84d..f0fb03e90b 100644 --- a/nptl/pthread_mutex_timedlock.c +++ b/nptl/pthread_mutex_timedlock.c @@ -22,6 +22,7 @@ #include #include #include "pthreadP.h" +#include #include #include @@ -125,10 +126,7 @@ pthread_mutex_timedlock (mutex, abstime) PTHREAD_MUTEX_PSHARED (mutex)); break; } - -#ifdef BUSY_WAIT_NOP - BUSY_WAIT_NOP; -#endif + atomic_spin_nop (); } while (lll_trylock (mutex->__data.__lock) != 0); diff --git a/nscd/nscd-client.h b/nscd/nscd-client.h index 43a8c61014..740e2f9e34 100644 --- a/nscd/nscd-client.h +++ b/nscd/nscd-client.h @@ -378,7 +378,7 @@ __nscd_acquire_maplock (volatile struct locked_map_ptr *mapptr) if (__glibc_unlikely (++cnt > 5)) return false; - atomic_delay (); + atomic_spin_nop (); } return true; diff --git a/sysdeps/i386/i486/bits/atomic.h b/sysdeps/i386/i486/bits/atomic.h index 59165bec94..59f3d34871 100644 --- a/sysdeps/i386/i486/bits/atomic.h +++ b/sysdeps/i386/i486/bits/atomic.h @@ -479,7 +479,7 @@ typedef uintmax_t uatomic_max_t; __result; }) -#define atomic_delay() asm ("rep; nop") +#define atomic_spin_nop() asm ("rep; nop") #define __arch_and_body(lock, mem, mask) \ diff --git a/sysdeps/nacl/lll_timedwait_tid.c b/sysdeps/nacl/lll_timedwait_tid.c index ecaf0b113a..ef544cf84f 100644 --- a/sysdeps/nacl/lll_timedwait_tid.c +++ b/sysdeps/nacl/lll_timedwait_tid.c @@ -40,7 +40,7 @@ __lll_timedwait_tid (int *tidp, const struct timespec *abstime) finish quick enough that the timeout doesn't matter. If any thread ever stays in this state for long, there is something catastrophically wrong. */ - BUSY_WAIT_NOP; + atomic_spin_nop (); else { assert (tid > 0); diff --git a/sysdeps/nacl/lowlevellock.h b/sysdeps/nacl/lowlevellock.h index 0b85d8d317..3634f1959a 100644 --- a/sysdeps/nacl/lowlevellock.h +++ b/sysdeps/nacl/lowlevellock.h @@ -21,10 +21,6 @@ /* Everything except the exit handling is the same as the generic code. */ # include -# ifndef BUSY_WAIT_NOP -# define BUSY_WAIT_NOP __sync_synchronize () -# endif - /* See exit-thread.h for details. */ # define NACL_EXITING_TID 1 @@ -36,7 +32,7 @@ while ((__tid = atomic_load_relaxed (__tidp)) != 0) \ { \ if (__tid == NACL_EXITING_TID) \ - BUSY_WAIT_NOP; \ + atomic_spin_nop (); \ else \ lll_futex_wait (__tidp, __tid, LLL_PRIVATE); \ } \ diff --git a/sysdeps/sparc/sparc32/sparcv9/bits/atomic.h b/sysdeps/sparc/sparc32/sparcv9/bits/atomic.h index 317be62ccb..2122afbb09 100644 --- a/sysdeps/sparc/sparc32/sparcv9/bits/atomic.h +++ b/sysdeps/sparc/sparc32/sparcv9/bits/atomic.h @@ -100,3 +100,6 @@ typedef uintmax_t uatomic_max_t; __asm __volatile ("membar #LoadLoad | #LoadStore" : : : "memory") #define atomic_write_barrier() \ __asm __volatile ("membar #LoadStore | #StoreStore" : : : "memory") + +extern void __cpu_relax (void); +#define atomic_spin_nop () __cpu_relax () diff --git a/sysdeps/sparc/sparc64/bits/atomic.h b/sysdeps/sparc/sparc64/bits/atomic.h index 35804a8e14..48b7fd6216 100644 --- a/sysdeps/sparc/sparc64/bits/atomic.h +++ b/sysdeps/sparc/sparc64/bits/atomic.h @@ -121,3 +121,6 @@ typedef uintmax_t uatomic_max_t; __asm __volatile ("membar #LoadLoad | #LoadStore" : : : "memory") #define atomic_write_barrier() \ __asm __volatile ("membar #LoadStore | #StoreStore" : : : "memory") + +extern void __cpu_relax (void); +#define atomic_spin_nop () __cpu_relax () diff --git a/sysdeps/unix/sysv/linux/i386/lowlevellock.h b/sysdeps/unix/sysv/linux/i386/lowlevellock.h index f57afc6e2a..58f5638e37 100644 --- a/sysdeps/unix/sysv/linux/i386/lowlevellock.h +++ b/sysdeps/unix/sysv/linux/i386/lowlevellock.h @@ -58,10 +58,6 @@ #define LLL_LOCK_INITIALIZER_WAITERS (2) -/* Delay in spinlock loop. */ -#define BUSY_WAIT_NOP asm ("rep; nop") - - /* NB: in the lll_trylock macro we simply return the value in %eax after the cmpxchg instruction. In case the operation succeded this value is zero. In case the operation failed, the cmpxchg instruction diff --git a/sysdeps/unix/sysv/linux/sparc/lowlevellock.h b/sysdeps/unix/sysv/linux/sparc/lowlevellock.h index 9aefd9eb59..7608c01d17 100644 --- a/sysdeps/unix/sysv/linux/sparc/lowlevellock.h +++ b/sysdeps/unix/sysv/linux/sparc/lowlevellock.h @@ -25,12 +25,6 @@ #include #include -#ifndef __sparc32_atomic_do_lock -/* Delay in spinlock loop. */ -extern void __cpu_relax (void); -#define BUSY_WAIT_NOP __cpu_relax () -#endif - #include static inline int diff --git a/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h b/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h index 573b48c4fe..de525cd4c7 100644 --- a/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h +++ b/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h @@ -57,9 +57,6 @@ #define LLL_LOCK_INITIALIZER_LOCKED (1) #define LLL_LOCK_INITIALIZER_WAITERS (2) -/* Delay in spinlock loop. */ -#define BUSY_WAIT_NOP asm ("rep; nop") - /* NB: in the lll_trylock macro we simply return the value in %eax after the cmpxchg instruction. In case the operation succeded this diff --git a/sysdeps/x86_64/bits/atomic.h b/sysdeps/x86_64/bits/atomic.h index 203d92c20d..337b334db1 100644 --- a/sysdeps/x86_64/bits/atomic.h +++ b/sysdeps/x86_64/bits/atomic.h @@ -410,7 +410,7 @@ typedef uintmax_t uatomic_max_t; __result; }) -#define atomic_delay() asm ("rep; nop") +#define atomic_spin_nop() asm ("rep; nop") #define __arch_and_body(lock, mem, mask) \