This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
PowerPC: libc single-thread lock optimization
- From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
- To: "GNU C. Library" <libc-alpha at sourceware dot org>
- Date: Tue, 08 Apr 2014 10:26:09 -0300
- Subject: PowerPC: libc single-thread lock optimization
- Authentication-results: sourceware.org; auth=none
This patch adds a single-thread optimization for libc.so locks used
within the shared objects. For each lock operations it checks it the
process has already spawned one thread and if not use non-atomic
operations. Other libraries (libpthread.so for instance) are unaffected
by this change.
This is similar to x86_64 optimization on locks and atomics by using the
__libc_multiple_threads variable.
Tested on powerpc32, powerpc64, and powerp64le.
Note: for macro code change I tried to change as little as possible the
current syntax.
--
* nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
(__lll_robust_trylock): Add single-thread lock optimization for calls
within libc.so.
* sysdeps/powerpc/bits/atomic.h
(__arch_compare_and_exchange_val_32_acq): Likewise.
(__arch_compare_and_exchange_val_32_rel): Likewise.
(__arch_atomic_exchange_32_acq): Likewise.
(__arch_atomic_exchange_32_rel): Likewise.
(__arch_atomic_exchange_and_add_32): Likewise.
(__arch_atomic_increment_val_32): Likewise.
(__arch_atomic_decrement_val_32): Likewise.
(__arch_atomic_decrement_if_positive_32): Likewise.
* sysdeps/powerpc/powerpc32/bits/atomic.h
(__arch_compare_and_exchange_bool_32_acq): Likewise.
(__arch_compare_and_exchange_bool_32_rel): Likewise.
* sysdeps/powerpc/powerpc64/bits/atomic.h
(__arch_compare_and_exchange_bool_32_acq): Likewise.
(__arch_compare_and_exchange_bool_32_rel): Likewise.
(__arch_compare_and_exchange_bool_64_acq): Likewise.
(__arch_compare_and_exchange_bool_64_rel): Likewise.
(__arch_compare_and_exchange_val_64_acq): Likewise.
(__arch_compare_and_exchange_val_64_rel): Likewise.
(__arch_atomic_exchange_64_acq): Likewise.
(__arch_atomic_exchange_64_rel): Likewise.
(__arch_atomic_exchange_and_add_64): Likewise.
(__arch_atomic_increment_val_64): Likewise.
(__arch_atomic_decrement_val_64): Likewise.
(__arch_atomic_decrement_if_positive_64): Likewise.
---
diff --git a/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
index ab92c3f..419ee2f 100644
--- a/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
@@ -205,7 +205,9 @@
/* Set *futex to ID if it is 0, atomically. Returns the old value */
#define __lll_robust_trylock(futex, id) \
({ int __val; \
- __asm __volatile ("1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ( \
+ "1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \
" cmpwi 0,%0,0\n" \
" bne 2f\n" \
" stwcx. %3,0,%2\n" \
@@ -214,6 +216,12 @@
: "=&r" (__val), "=m" (*futex) \
: "r" (futex), "r" (id), "m" (*futex) \
: "cr0", "memory"); \
+ else \
+ { \
+ __val = *futex; \
+ if (__val == 0) \
+ *futex = id; \
+ } \
__val; \
})
diff --git a/sysdeps/powerpc/bits/atomic.h b/sysdeps/powerpc/bits/atomic.h
index 2ffba48..2d31411 100644
--- a/sysdeps/powerpc/bits/atomic.h
+++ b/sysdeps/powerpc/bits/atomic.h
@@ -76,6 +76,10 @@ typedef uintmax_t uatomic_max_t;
# define MUTEX_HINT_REL
#endif
+/* Note: SINGLE_THREAD_P is defined either in
+ sysdeps/powerpc/powerpc64/bits/atomic.h or
+ sysdeps/powerpc/powerpc32/bits/atomic.h */
+
#define atomic_full_barrier() __asm ("sync" ::: "memory")
#define atomic_write_barrier() __asm ("eieio" ::: "memory")
@@ -83,7 +87,8 @@ typedef uintmax_t uatomic_max_t;
({ \
__typeof (*(mem)) __tmp; \
__typeof (mem) __memp = (mem); \
- __asm __volatile ( \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ( \
"1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" \
" cmpw %0,%2\n" \
" bne 2f\n" \
@@ -93,6 +98,12 @@ typedef uintmax_t uatomic_max_t;
: "=&r" (__tmp) \
: "b" (__memp), "r" (oldval), "r" (newval) \
: "cr0", "memory"); \
+ else \
+ { \
+ __tmp = *__memp; \
+ if (__tmp == oldval) \
+ *__memp = newval; \
+ } \
__tmp; \
})
@@ -100,7 +111,8 @@ typedef uintmax_t uatomic_max_t;
({ \
__typeof (*(mem)) __tmp; \
__typeof (mem) __memp = (mem); \
- __asm __volatile (__ARCH_REL_INSTR "\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile (__ARCH_REL_INSTR "\n" \
"1: lwarx %0,0,%1" MUTEX_HINT_REL "\n" \
" cmpw %0,%2\n" \
" bne 2f\n" \
@@ -110,13 +122,20 @@ typedef uintmax_t uatomic_max_t;
: "=&r" (__tmp) \
: "b" (__memp), "r" (oldval), "r" (newval) \
: "cr0", "memory"); \
+ else \
+ { \
+ __tmp = *__memp; \
+ if (__tmp == oldval) \
+ *__memp = newval; \
+ } \
__tmp; \
})
#define __arch_atomic_exchange_32_acq(mem, value) \
({ \
__typeof (*mem) __val; \
- __asm __volatile ( \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ( \
"1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \
" stwcx. %3,0,%2\n" \
" bne- 1b\n" \
@@ -124,64 +143,92 @@ typedef uintmax_t uatomic_max_t;
: "=&r" (__val), "=m" (*mem) \
: "b" (mem), "r" (value), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ { \
+ __val = *mem; \
+ *mem = value; \
+ } \
__val; \
})
#define __arch_atomic_exchange_32_rel(mem, value) \
({ \
__typeof (*mem) __val; \
- __asm __volatile (__ARCH_REL_INSTR "\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile (__ARCH_REL_INSTR "\n" \
"1: lwarx %0,0,%2" MUTEX_HINT_REL "\n" \
" stwcx. %3,0,%2\n" \
" bne- 1b" \
: "=&r" (__val), "=m" (*mem) \
: "b" (mem), "r" (value), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ { \
+ __val = *mem; \
+ *mem = value; \
+ } \
__val; \
})
#define __arch_atomic_exchange_and_add_32(mem, value) \
({ \
__typeof (*mem) __val, __tmp; \
- __asm __volatile ("1: lwarx %0,0,%3\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ( \
+ "1: lwarx %0,0,%3\n" \
" add %1,%0,%4\n" \
" stwcx. %1,0,%3\n" \
" bne- 1b" \
: "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \
: "b" (mem), "r" (value), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ { \
+ __val = *mem; \
+ *mem += value; \
+ } \
__val; \
})
#define __arch_atomic_increment_val_32(mem) \
({ \
__typeof (*(mem)) __val; \
- __asm __volatile ("1: lwarx %0,0,%2\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ( \
+ "1: lwarx %0,0,%2\n" \
" addi %0,%0,1\n" \
" stwcx. %0,0,%2\n" \
" bne- 1b" \
: "=&b" (__val), "=m" (*mem) \
: "b" (mem), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ __val = ++(*mem); \
__val; \
})
#define __arch_atomic_decrement_val_32(mem) \
({ \
__typeof (*(mem)) __val; \
- __asm __volatile ("1: lwarx %0,0,%2\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ( \
+ "1: lwarx %0,0,%2\n" \
" subi %0,%0,1\n" \
" stwcx. %0,0,%2\n" \
" bne- 1b" \
: "=&b" (__val), "=m" (*mem) \
: "b" (mem), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ __val = --(*mem); \
__val; \
})
#define __arch_atomic_decrement_if_positive_32(mem) \
({ int __val, __tmp; \
- __asm __volatile ("1: lwarx %0,0,%3\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ( \
+ "1: lwarx %0,0,%3\n" \
" cmpwi 0,%0,0\n" \
" addi %1,%0,-1\n" \
" ble 2f\n" \
@@ -191,6 +238,12 @@ typedef uintmax_t uatomic_max_t;
: "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \
: "b" (mem), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ { \
+ __val = (*mem); \
+ if (__val > 0) \
+ --(*mem); \
+ } \
__val; \
})
diff --git a/sysdeps/powerpc/powerpc32/bits/atomic.h b/sysdeps/powerpc/powerpc32/bits/atomic.h
index 7613bdc..08043a7 100644
--- a/sysdeps/powerpc/powerpc32/bits/atomic.h
+++ b/sysdeps/powerpc/powerpc32/bits/atomic.h
@@ -17,6 +17,8 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <tls.h>
+
/* POWER6 adds a "Mutex Hint" to the Load and Reserve instruction.
This is a hint to the hardware to expect additional updates adjacent
to the lock word or not. If we are acquiring a Mutex, the hint
@@ -33,6 +35,14 @@
# define MUTEX_HINT_REL
#endif
+/* Check if the process has created a thread. */
+#ifndef NOT_IN_libc
+# define SINGLE_THREAD_P \
+ (THREAD_GETMEM (THREAD_SELF, header.multiple_threads) == 0)
+#else
+# define SINGLE_THREAD_P 0
+#endif
+
/*
* The 32-bit exchange_bool is different on powerpc64 because the subf
* does signed 64-bit arithmetic while the lwarx is 32-bit unsigned
@@ -42,7 +52,8 @@
#define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \
({ \
unsigned int __tmp; \
- __asm __volatile ( \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ( \
"1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" \
" subf. %0,%2,%0\n" \
" bne 2f\n" \
@@ -52,13 +63,20 @@
: "=&r" (__tmp) \
: "b" (mem), "r" (oldval), "r" (newval) \
: "cr0", "memory"); \
+ else \
+ { \
+ __tmp = !(*mem == oldval); \
+ if (!__tmp) \
+ *mem = newval; \
+ } \
__tmp != 0; \
})
#define __arch_compare_and_exchange_bool_32_rel(mem, newval, oldval) \
({ \
unsigned int __tmp; \
- __asm __volatile (__ARCH_REL_INSTR "\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile (__ARCH_REL_INSTR "\n" \
"1: lwarx %0,0,%1" MUTEX_HINT_REL "\n" \
" subf. %0,%2,%0\n" \
" bne 2f\n" \
@@ -68,6 +86,12 @@
: "=&r" (__tmp) \
: "b" (mem), "r" (oldval), "r" (newval) \
: "cr0", "memory"); \
+ else \
+ { \
+ __tmp = !(*mem == oldval); \
+ if (!__tmp) \
+ *mem = newval; \
+ } \
__tmp != 0; \
})
diff --git a/sysdeps/powerpc/powerpc64/bits/atomic.h b/sysdeps/powerpc/powerpc64/bits/atomic.h
index 527fe7c..0e2fe98 100644
--- a/sysdeps/powerpc/powerpc64/bits/atomic.h
+++ b/sysdeps/powerpc/powerpc64/bits/atomic.h
@@ -17,6 +17,8 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <tls.h>
+
/* POWER6 adds a "Mutex Hint" to the Load and Reserve instruction.
This is a hint to the hardware to expect additional updates adjacent
to the lock word or not. If we are acquiring a Mutex, the hint
@@ -33,6 +35,15 @@
# define MUTEX_HINT_REL
#endif
+/* Check if the process has created a thread. The lock optimization is only
+ for locks within libc.so. */
+#ifndef NOT_IN_libc
+# define SINGLE_THREAD_P \
+ (THREAD_GETMEM (THREAD_SELF, header.multiple_threads) == 0)
+#else
+# define SINGLE_THREAD_P 0
+#endif
+
/* The 32-bit exchange_bool is different on powerpc64 because the subf
does signed 64-bit arithmetic while the lwarx is 32-bit unsigned
(a load word and zero (high 32) form) load.
@@ -42,7 +53,8 @@
#define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \
({ \
unsigned int __tmp, __tmp2; \
- __asm __volatile (" clrldi %1,%1,32\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile (" clrldi %1,%1,32\n" \
"1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \
" subf. %0,%1,%0\n" \
" bne 2f\n" \
@@ -52,13 +64,20 @@
: "=&r" (__tmp), "=r" (__tmp2) \
: "b" (mem), "1" (oldval), "r" (newval) \
: "cr0", "memory"); \
+ else \
+ { \
+ __tmp = !(*mem == oldval); \
+ if (!__tmp) \
+ *mem = newval; \
+ } \
__tmp != 0; \
})
#define __arch_compare_and_exchange_bool_32_rel(mem, newval, oldval) \
({ \
unsigned int __tmp, __tmp2; \
- __asm __volatile (__ARCH_REL_INSTR "\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile (__ARCH_REL_INSTR "\n" \
" clrldi %1,%1,32\n" \
"1: lwarx %0,0,%2" MUTEX_HINT_REL "\n" \
" subf. %0,%1,%0\n" \
@@ -69,6 +88,12 @@
: "=&r" (__tmp), "=r" (__tmp2) \
: "b" (mem), "1" (oldval), "r" (newval) \
: "cr0", "memory"); \
+ else \
+ { \
+ __tmp = !(*mem == oldval); \
+ if (!__tmp) \
+ *mem = newval; \
+ } \
__tmp != 0; \
})
@@ -80,7 +105,8 @@
#define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \
({ \
unsigned long __tmp; \
- __asm __volatile ( \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ( \
"1: ldarx %0,0,%1" MUTEX_HINT_ACQ "\n" \
" subf. %0,%2,%0\n" \
" bne 2f\n" \
@@ -90,13 +116,20 @@
: "=&r" (__tmp) \
: "b" (mem), "r" (oldval), "r" (newval) \
: "cr0", "memory"); \
+ else \
+ { \
+ __tmp = !(*mem == oldval); \
+ if (!__tmp) \
+ *mem = newval; \
+ } \
__tmp != 0; \
})
#define __arch_compare_and_exchange_bool_64_rel(mem, newval, oldval) \
({ \
unsigned long __tmp; \
- __asm __volatile (__ARCH_REL_INSTR "\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile (__ARCH_REL_INSTR "\n" \
"1: ldarx %0,0,%2" MUTEX_HINT_REL "\n" \
" subf. %0,%2,%0\n" \
" bne 2f\n" \
@@ -106,6 +139,12 @@
: "=&r" (__tmp) \
: "b" (mem), "r" (oldval), "r" (newval) \
: "cr0", "memory"); \
+ else \
+ { \
+ __tmp = !(*mem == oldval); \
+ if (!__tmp) \
+ *mem = newval; \
+ } \
__tmp != 0; \
})
@@ -113,7 +152,8 @@
({ \
__typeof (*(mem)) __tmp; \
__typeof (mem) __memp = (mem); \
- __asm __volatile ( \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ( \
"1: ldarx %0,0,%1" MUTEX_HINT_ACQ "\n" \
" cmpd %0,%2\n" \
" bne 2f\n" \
@@ -123,6 +163,12 @@
: "=&r" (__tmp) \
: "b" (__memp), "r" (oldval), "r" (newval) \
: "cr0", "memory"); \
+ else \
+ { \
+ __tmp = *__memp; \
+ if (__tmp == oldval) \
+ *__memp = newval; \
+ } \
__tmp; \
})
@@ -130,7 +176,8 @@
({ \
__typeof (*(mem)) __tmp; \
__typeof (mem) __memp = (mem); \
- __asm __volatile (__ARCH_REL_INSTR "\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile (__ARCH_REL_INSTR "\n" \
"1: ldarx %0,0,%1" MUTEX_HINT_REL "\n" \
" cmpd %0,%2\n" \
" bne 2f\n" \
@@ -140,13 +187,20 @@
: "=&r" (__tmp) \
: "b" (__memp), "r" (oldval), "r" (newval) \
: "cr0", "memory"); \
+ else \
+ { \
+ __tmp = *__memp; \
+ if (__tmp == oldval) \
+ *__memp = newval; \
+ } \
__tmp; \
})
#define __arch_atomic_exchange_64_acq(mem, value) \
({ \
__typeof (*mem) __val; \
- __asm __volatile (__ARCH_REL_INSTR "\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile (__ARCH_REL_INSTR "\n" \
"1: ldarx %0,0,%2" MUTEX_HINT_ACQ "\n" \
" stdcx. %3,0,%2\n" \
" bne- 1b\n" \
@@ -154,64 +208,88 @@
: "=&r" (__val), "=m" (*mem) \
: "b" (mem), "r" (value), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ { \
+ __val = *mem; \
+ *mem = value; \
+ } \
__val; \
})
#define __arch_atomic_exchange_64_rel(mem, value) \
({ \
__typeof (*mem) __val; \
- __asm __volatile (__ARCH_REL_INSTR "\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile (__ARCH_REL_INSTR "\n" \
"1: ldarx %0,0,%2" MUTEX_HINT_REL "\n" \
" stdcx. %3,0,%2\n" \
" bne- 1b" \
: "=&r" (__val), "=m" (*mem) \
: "b" (mem), "r" (value), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ { \
+ __val = *mem; \
+ *mem = value; \
+ } \
__val; \
})
#define __arch_atomic_exchange_and_add_64(mem, value) \
({ \
__typeof (*mem) __val, __tmp; \
- __asm __volatile ("1: ldarx %0,0,%3\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ("1: ldarx %0,0,%3\n" \
" add %1,%0,%4\n" \
" stdcx. %1,0,%3\n" \
" bne- 1b" \
: "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \
: "b" (mem), "r" (value), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ { \
+ __val = *mem; \
+ *mem += value; \
+ } \
__val; \
})
#define __arch_atomic_increment_val_64(mem) \
({ \
__typeof (*(mem)) __val; \
- __asm __volatile ("1: ldarx %0,0,%2\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ("1: ldarx %0,0,%2\n" \
" addi %0,%0,1\n" \
" stdcx. %0,0,%2\n" \
" bne- 1b" \
: "=&b" (__val), "=m" (*mem) \
: "b" (mem), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ __val = ++(*mem); \
__val; \
})
#define __arch_atomic_decrement_val_64(mem) \
({ \
__typeof (*(mem)) __val; \
- __asm __volatile ("1: ldarx %0,0,%2\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ("1: ldarx %0,0,%2\n" \
" subi %0,%0,1\n" \
" stdcx. %0,0,%2\n" \
" bne- 1b" \
: "=&b" (__val), "=m" (*mem) \
: "b" (mem), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ __val = --(*mem); \
__val; \
})
#define __arch_atomic_decrement_if_positive_64(mem) \
({ int __val, __tmp; \
- __asm __volatile ("1: ldarx %0,0,%3\n" \
+ if (!SINGLE_THREAD_P) \
+ __asm __volatile ("1: ldarx %0,0,%3\n" \
" cmpdi 0,%0,0\n" \
" addi %1,%0,-1\n" \
" ble 2f\n" \
@@ -221,6 +299,12 @@
: "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \
: "b" (mem), "m" (*mem) \
: "cr0", "memory"); \
+ else \
+ { \
+ __val = (*mem); \
+ if (__val > 0) \
+ --(*mem); \
+ } \
__val; \
})