This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Future atomic operation cleanup


On 08/22/2014 06:50 AM, Adhemerval Zanella wrote:
> Hi,
> 
> Following comments from my first patch to optimize single-thread internal
> glibc locking/atomics [1], I have changed the implementation to use now
> relaxed atomics instead.  Addresing the concerns raised in last discussion, 
> the primitives are still signal-safe (although not thread-safe), so if future
> malloc implementation is changed to be async-safe, it won't require to a
> adjust powerpc atomics.
> 
> For catomic_and and catomic_or I follow the definition at 'include/atomic.h'
> (which powerpc is currently using) and implemented the atomics with acquire
> semantics.  The new implementation also is simpler.
> 
> On synthetic benchmarks it shows an improvement of 5-10% for malloc
> calls and an performance increase of 7-8% in 483.xalancbmk from
> speccpu2006 (number from an POWER8 machine).
> 
> Checked on powerpc64, powerpc32 and powerpc64le.

Wow, that's a lot of boiler-plate.

When development opens again, can we simplify all of these atomic operations by
assuming compiler primitives?  That is, use the __atomic builtins for gcc 4.8
and later, fall back to the __sync builtins for earlier gcc, and completely
drop support for truly ancient compilers that support neither.

As a bonus we'd get to unify the implementations across all of the targets.


r~

> 
> [1] https://sourceware.org/ml/libc-alpha/2014-05/msg00118.html
> 
> --
> 
> 	* sysdeps/powerpc/bits/atomic.h
> 	(__arch_compare_and_exchange_val_32_relaxed): New macro: atomic compare
> 	and exchange with relaxed semantic.
> 	(atomic_compare_and_exchange_val_relaxed): Likewise.
> 	(__arch_atomic_exchange_32_relaxed): New macro: atomic exchange with
> 	relaxed semantic.
> 	(atomic_exchange_relaxed): Likewise.
> 	(__arch_atomic_and_32): New macro: atomic bitwise and with acquire
> 	semantic.
> 	(__arch_atomic_and_32_relaxed): New macro: atomic bitwise and with
> 	relaxed semantic.
> 	(atomic_and_relaxed): Likewise.
> 	(__arch_atomic_or_32): New macro: atomic bitwise and with acquire
> 	semantic.
> 	(__arch_atomic_or_32_relaxed): New macro: atomic bitwise and with
> 	relaxed semantic.
> 	(atomic_or_relaxed): Likewise.
> 	(__atomic_is_single_thread): New macro: check if program is
> 	single-thread.
> 	(atomic_compare_and_exchange_val_acq): Add relaxed operation for
> 	single-thread.
> 	(atomic_compare_and_exchange_val_rel): Likewise.
> 	(atomic_exchange_rel): Likewise.
> 	(catomic_and): Likewise.
> 	(catomic_or): Likewise.
> 	* sysdeps/powerpc/powerpc32/bits/atomic.h
> 	(__arch_compare_and_exchange_val_64_relaxed): New macro: add empty
> 	implementation.
> 	(__arch_atomic_exchange_64_relaxed): Likewise.
> 	* sysdeps/powerpc/powerpc32/bits/atomic.h
> 	(__arch_compare_and_exchange_val_64_relaxed): New macro: atomic compare
> 	and exchange with relaxed semantics.
> 	(__arch_atomic_exchange_64_relaxed): New macro: atomic exchange with
> 	relaxed semantic.
> 	(__arch_atomic_and_64_relaxed): New macro: atomic exchange with
> 	relaxed semantic.
> 	(__arch_atomic_and_64): New macro: atomic bitwise and with acquire
> 	semantic.
> 	(__arch_atomic_or_64_relaxed): New macro: atomic bitwise or with
> 	relaxed semantic.
> 	(__arch_atomic_or_64): New macro: atomic bitwise or with acquire
> 	semantic.
> 
> ---
> 
> diff --git a/sysdeps/powerpc/bits/atomic.h b/sysdeps/powerpc/bits/atomic.h
> index 2ffba48..be590c7 100644
> --- a/sysdeps/powerpc/bits/atomic.h
> +++ b/sysdeps/powerpc/bits/atomic.h
> @@ -27,6 +27,7 @@
>   */
>  
>  #include <stdint.h>
> +#include <tls.h>
>  
>  typedef int32_t atomic32_t;
>  typedef uint32_t uatomic32_t;
> @@ -113,6 +114,23 @@ typedef uintmax_t uatomic_max_t;
>        __tmp;								      \
>    })
>  
> +#define __arch_compare_and_exchange_val_32_relaxed(mem, newval, oldval)	      \
> +  ({									      \
> +      __typeof (*(mem)) __tmp;						      \
> +      __typeof (mem)  __memp = (mem);					      \
> +      __asm __volatile (						      \
> +		        "1:	lwarx	%0,0,%1\n"			      \
> +		        "	cmpw	%0,%2\n"			      \
> +		        "	bne	2f\n"				      \
> +		        "	stwcx.	%3,0,%1\n"			      \
> +		        "	bne-	1b\n"				      \
> +		        "2:	"					      \
> +		        : "=&r" (__tmp)					      \
> +		        : "b" (__memp), "r" (oldval), "r" (newval)	      \
> +		        : "cr0", "memory");				      \
> +      __tmp;								      \
> +  })
> +
>  #define __arch_atomic_exchange_32_acq(mem, value)			      \
>    ({									      \
>      __typeof (*mem) __val;						      \
> @@ -127,6 +145,18 @@ typedef uintmax_t uatomic_max_t;
>      __val;								      \
>    })
>  
> +#define __arch_atomic_exchange_32_relaxed(mem, value)			      \
> +  ({									      \
> +    __typeof (*mem) __val;						      \
> +    __asm __volatile ("1:	lwarx	%0,0,%2\n"			      \
> +		      "		stwcx.	%3,0,%2\n"			      \
> +		      "		bne-	1b\n"				      \
> +		      : "=&r" (__val), "=m" (*mem)			      \
> +		      : "b" (mem), "r" (value), "m" (*mem)		      \
> +		      : "cr0", "memory");				      \
> +    __val;								      \
> +  })
> +
>  #define __arch_atomic_exchange_32_rel(mem, value) \
>    ({									      \
>      __typeof (*mem) __val;						      \
> @@ -140,6 +170,7 @@ typedef uintmax_t uatomic_max_t;
>      __val;								      \
>    })
>  
> +
>  #define __arch_atomic_exchange_and_add_32(mem, value) \
>    ({									      \
>      __typeof (*mem) __val, __tmp;					      \
> @@ -153,6 +184,62 @@ typedef uintmax_t uatomic_max_t;
>      __val;								      \
>    })
>  
> +#define __arch_atomic_and_32(mem, value) \
> +  ({									      \
> +    __typeof (*mem) __val, __tmp;					      \
> +    __asm __volatile ("1:	lwarx	%0,0,%3\n"			      \
> +		      "		add	%1,%0,%4\n"			      \
> +		      "		stwcx.	%1,0,%3\n"			      \
> +		      "		bne-	1b\n"				      \
> +		      "         " __ARCH_ACQ_INSTR			      \
> +		      : "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
> +		      : "b" (mem), "r" (value), "m" (*mem)		      \
> +		      : "cr0", "memory");				      \
> +    __val;								      \
> +  })
> +
> +#define __arch_atomic_and_32_relaxed(mem, value) \
> +  ({									      \
> +    __typeof (*mem) __val, __tmp;					      \
> +    __asm __volatile ("		\n"					      \
> +		      "1:	lwarx	%0,0,%3\n"			      \
> +		      "		and	%1,%0,%4\n"			      \
> +		      "		stwcx.	%1,0,%3\n"			      \
> +		      "		bne-	1b\n"				      \
> +		      : "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
> +		      : "b" (mem), "r" (value), "m" (*mem)		      \
> +		      : "cr0", "memory");				      \
> +    __val;								      \
> +  })
> +
> +#define __arch_atomic_or_32(mem, value) \
> +  ({									      \
> +    __typeof (*mem) __val, __tmp;					      \
> +    __asm __volatile ("1:	lwarx	%0,0,%3\n"			      \
> +		      "		or	%1,%0,%4\n"			      \
> +		      "		stwcx.	%1,0,%3\n"			      \
> +		      "		bne-	1b\n"				      \
> +		      "         " __ARCH_ACQ_INSTR	 		      \
> +		      : "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
> +		      : "b" (mem), "r" (value), "m" (*mem)		      \
> +		      : "cr0", "memory");				      \
> +    __val;								      \
> +  })
> +
> +#define __arch_atomic_or_32_relaxed(mem, value) \
> +  ({									      \
> +    __typeof (*mem) __val, __tmp;					      \
> +    __asm __volatile ("		\n"					      \
> +		      "1:	lwarx	%0,0,%3\n"			      \
> +		      "		or	%1,%0,%4\n"			      \
> +		      "		stwcx.	%1,0,%3\n"			      \
> +		      "		bne-	1b\n"				      \
> +		      : "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
> +		      : "b" (mem), "r" (value), "m" (*mem)		      \
> +		      : "cr0", "memory");				      \
> +    __val;								      \
> +  })
> +
>  #define __arch_atomic_increment_val_32(mem) \
>    ({									      \
>      __typeof (*(mem)) __val;						      \
> @@ -194,10 +281,27 @@ typedef uintmax_t uatomic_max_t;
>       __val;								      \
>    })
>  
> -#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
> +#define __atomic_is_single_thread                                             \
> +  (THREAD_GETMEM (THREAD_SELF, header.multiple_threads) == 0)
> +
> +#define atomic_compare_and_exchange_val_relaxed(mem, newval, oldval) \
>    ({									      \
>      __typeof (*(mem)) __result;						      \
>      if (sizeof (*mem) == 4)						      \
> +      __result = __arch_compare_and_exchange_val_32_relaxed(mem, newval, oldval); \
> +    else if (sizeof (*mem) == 8)					      \
> +      __result = __arch_compare_and_exchange_val_64_relaxed(mem, newval, oldval); \
> +    else 								      \
> +       abort ();							      \
> +    __result;								      \
> +  })
> +
> +#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
> +  ({									      \
> +    __typeof (*(mem)) __result;						      \
> +    if (__atomic_is_single_thread)					      \
> +      __result = atomic_compare_and_exchange_val_relaxed (mem, newval, oldval); \
> +    else if (sizeof (*mem) == 4)					      \
>        __result = __arch_compare_and_exchange_val_32_acq(mem, newval, oldval); \
>      else if (sizeof (*mem) == 8)					      \
>        __result = __arch_compare_and_exchange_val_64_acq(mem, newval, oldval); \
> @@ -209,7 +313,9 @@ typedef uintmax_t uatomic_max_t;
>  #define atomic_compare_and_exchange_val_rel(mem, newval, oldval) \
>    ({									      \
>      __typeof (*(mem)) __result;						      \
> -    if (sizeof (*mem) == 4)						      \
> +    if (__atomic_is_single_thread)					      \
> +      __result = atomic_compare_and_exchange_val_relaxed (mem, newval, oldval); \
> +    else if (sizeof (*mem) == 4)					      \
>        __result = __arch_compare_and_exchange_val_32_rel(mem, newval, oldval); \
>      else if (sizeof (*mem) == 8)					      \
>        __result = __arch_compare_and_exchange_val_64_rel(mem, newval, oldval); \
> @@ -218,10 +324,24 @@ typedef uintmax_t uatomic_max_t;
>      __result;								      \
>    })
>  
> -#define atomic_exchange_acq(mem, value) \
> +#define atomic_exchange_relaxed(mem, value) \
>    ({									      \
>      __typeof (*(mem)) __result;						      \
>      if (sizeof (*mem) == 4)						      \
> +      __result = __arch_atomic_exchange_32_relaxed (mem, value);	      \
> +    else if (sizeof (*mem) == 8)					      \
> +      __result = __arch_atomic_exchange_64_relaxed (mem, value);	      \
> +    else 								      \
> +       abort ();							      \
> +    __result;								      \
> +  })
> +
> +#define atomic_exchange_acq(mem, value)					      \
> +  ({									      \
> +    __typeof (*(mem)) __result;						      \
> +    if (__atomic_is_single_thread)					      \
> +      __result = atomic_exchange_relaxed (mem, value);			      \
> +    else if (sizeof (*mem) == 4)					      \
>        __result = __arch_atomic_exchange_32_acq (mem, value);		      \
>      else if (sizeof (*mem) == 8)					      \
>        __result = __arch_atomic_exchange_64_acq (mem, value);		      \
> @@ -233,7 +353,9 @@ typedef uintmax_t uatomic_max_t;
>  #define atomic_exchange_rel(mem, value) \
>    ({									      \
>      __typeof (*(mem)) __result;						      \
> -    if (sizeof (*mem) == 4)						      \
> +    if (__atomic_is_single_thread)					      \
> +      __result = atomic_exchange_relaxed (mem, value);			      \
> +    else if (sizeof (*mem) == 4)					      \
>        __result = __arch_atomic_exchange_32_rel (mem, value);		      \
>      else if (sizeof (*mem) == 8)					      \
>        __result = __arch_atomic_exchange_64_rel (mem, value);		      \
> @@ -294,3 +416,55 @@ typedef uintmax_t uatomic_max_t;
>         abort ();							      \
>      __result;								      \
>    })
> +
> +#define atomic_and_relaxed(mem, arg) \
> +  ({									      \
> +    __typeof (*(mem)) __result;						      \
> +    if (sizeof (*mem) == 4)						      \
> +      __result = __arch_atomic_and_32_relaxed(mem, arg);		      \
> +    else if (sizeof (*mem) == 8)					      \
> +      __result = __arch_atomic_and_64_relaxed(mem, arg);		      \
> +    else 								      \
> +       abort ();							      \
> +    __result;								      \
> +  })
> +
> +#define catomic_and(mem, arg) \
> +  ({									      \
> +    __typeof (*(mem)) __result;						      \
> +    if (__atomic_is_single_thread)					      \
> +      __result = atomic_and_relaxed (mem, arg); \
> +    else if (sizeof (*mem) == 4)					      \
> +      __result = __arch_atomic_and_32(mem, arg); \
> +    else if (sizeof (*mem) == 8)					      \
> +      __result = __arch_atomic_and_64(mem, arg); \
> +    else 								      \
> +       abort ();							      \
> +    __result;								      \
> +  })
> +
> +#define atomic_or_relaxed(mem, arg) \
> +  ({									      \
> +    __typeof (*(mem)) __result;						      \
> +    if (sizeof (*mem) == 4)						      \
> +      __result = __arch_atomic_or_32_relaxed(mem, arg);			      \
> +    else if (sizeof (*mem) == 8)					      \
> +      __result = __arch_atomic_or_64_relaxed(mem, arg);			      \
> +    else 								      \
> +       abort ();							      \
> +    __result;								      \
> +  })
> +
> +#define catomic_or(mem, arg) \
> +  ({									      \
> +    __typeof (*(mem)) __result;						      \
> +    if (__atomic_is_single_thread)					      \
> +      __result = atomic_or_relaxed (mem, arg); \
> +    else if (sizeof (*mem) == 4)					      \
> +      __result = __arch_atomic_or_32(mem, arg); \
> +    else if (sizeof (*mem) == 8)					      \
> +      __result = __arch_atomic_or_64(mem, arg); \
> +    else 								      \
> +       abort ();							      \
> +    __result;								      \
> +  })
> diff --git a/sysdeps/powerpc/powerpc32/bits/atomic.h b/sysdeps/powerpc/powerpc32/bits/atomic.h
> index 7613bdc..1b9f82a 100644
> --- a/sysdeps/powerpc/powerpc32/bits/atomic.h
> +++ b/sysdeps/powerpc/powerpc32/bits/atomic.h
> @@ -83,6 +83,9 @@
>  #define __arch_compare_and_exchange_bool_64_rel(mem, newval, oldval) \
>    (abort (), 0)
>  
> +#define __arch_compare_and_exchange_val_64_relaxed(mem, newval, oldval) \
> +  (abort (), (__typeof (*mem)) 0)
> +
>  #define __arch_compare_and_exchange_val_64_rel(mem, newval, oldval) \
>    (abort (), (__typeof (*mem)) 0)
>  
> @@ -92,6 +95,9 @@
>  #define __arch_atomic_exchange_64_rel(mem, value) \
>      ({ abort (); (*mem) = (value); })
>  
> +#define __arch_atomic_exchange_64_relaxed(mem, value) \
> +    ({ abort (); (*mem) = (value); })
> +
>  #define __arch_atomic_exchange_and_add_64(mem, value) \
>      ({ abort (); (*mem) = (value); })
>  
> diff --git a/sysdeps/powerpc/powerpc64/bits/atomic.h b/sysdeps/powerpc/powerpc64/bits/atomic.h
> index 527fe7c..b8a1035 100644
> --- a/sysdeps/powerpc/powerpc64/bits/atomic.h
> +++ b/sysdeps/powerpc/powerpc64/bits/atomic.h
> @@ -143,6 +143,23 @@
>        __tmp;								      \
>    })
>  
> +#define __arch_compare_and_exchange_val_64_relaxed(mem, newval, oldval) \
> +  ({									      \
> +      __typeof (*(mem)) __tmp;						      \
> +      __typeof (mem)  __memp = (mem);					      \
> +      __asm __volatile ("\n"				      \
> +		        "1:	ldarx	%0,0,%1\n"	      \
> +		        "	cmpd	%0,%2\n"			      \
> +		        "	bne	2f\n"				      \
> +		        "	stdcx.	%3,0,%1\n"			      \
> +		        "	bne-	1b\n"				      \
> +		        "2:	"					      \
> +		        : "=&r" (__tmp)					      \
> +		        : "b" (__memp), "r" (oldval), "r" (newval)	      \
> +		        : "cr0", "memory");				      \
> +      __tmp;								      \
> +  })
> +
>  #define __arch_atomic_exchange_64_acq(mem, value) \
>      ({									      \
>        __typeof (*mem) __val;						      \
> @@ -170,6 +187,18 @@
>        __val;								      \
>      })
>  
> +#define __arch_atomic_exchange_64_relaxed(mem, value) \
> +    ({									      \
> +      __typeof (*mem) __val;						      \
> +      __asm __volatile ("1:	ldarx	%0,0,%2\n"			      \
> +			"	stdcx.	%3,0,%2\n"			      \
> +			"	bne-	1b"				      \
> +			: "=&r" (__val), "=m" (*mem)			      \
> +			: "b" (mem), "r" (value), "m" (*mem)		      \
> +			: "cr0", "memory");				      \
> +      __val;								      \
> +    })
> +
>  #define __arch_atomic_exchange_and_add_64(mem, value) \
>      ({									      \
>        __typeof (*mem) __val, __tmp;					      \
> @@ -224,6 +253,60 @@
>       __val;								      \
>    })
>  
> +#define __arch_atomic_and_64_relaxed(mem, value) \
> +  ({									      \
> +    __typeof (*mem) __val, __tmp;					      \
> +    __asm __volatile ("1:	ldarx	%0,0,%3\n"			      \
> +		      "		and	%1,%0,%4\n"			      \
> +		      "		stdcx.	%1,0,%3\n"			      \
> +		      "		bne-	1b\n"				      \
> +		      : "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
> +		      : "b" (mem), "r" (value), "m" (*mem)		      \
> +		      : "cr0", "memory");				      \
> +    __val;								      \
> +  })
> +
> +#define __arch_atomic_and_64(mem, value) \
> +  ({									      \
> +    __typeof (*mem) __val, __tmp;					      \
> +    __asm __volatile ("1:	ldarx	%0,0,%3\n"			      \
> +		      "		and	%1,%0,%4\n"			      \
> +		      "		stdcx.	%1,0,%3\n"			      \
> +		      "		bne-	1b\n"				      \
> +		      "       " __ARCH_ACQ_INSTR			      \
> +		      : "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
> +		      : "b" (mem), "r" (value), "m" (*mem)		      \
> +		      : "cr0", "memory");				      \
> +    __val;								      \
> +  })
> +
> +#define __arch_atomic_or_64_relaxed(mem, value)				      \
> +  ({									      \
> +    __typeof (*mem) __val, __tmp;					      \
> +    __asm __volatile ("1:	ldarx	%0,0,%3\n"			      \
> +		      "		or	%1,%0,%4\n"			      \
> +		      "		stdcx.	%1,0,%3\n"			      \
> +		      "		bne-	1b\n"				      \
> +		      : "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
> +		      : "b" (mem), "r" (value), "m" (*mem)		      \
> +		      : "cr0", "memory");				      \
> +    __val;								      \
> +  })
> +
> +#define __arch_atomic_or_64(mem, value)					      \
> +  ({									      \
> +    __typeof (*mem) __val, __tmp;					      \
> +    __asm __volatile ("1:	ldarx	%0,0,%3\n"			      \
> +		      "		or	%1,%0,%4\n"			      \
> +		      "		stdcx.	%1,0,%3\n"			      \
> +		      "		bne-	1b\n"				      \
> +		      "       " __ARCH_ACQ_INSTR			      \
> +		      : "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
> +		      : "b" (mem), "r" (value), "m" (*mem)		      \
> +		      : "cr0", "memory");				      \
> +    __val;								      \
> +  })
> +
>  /*
>   * All powerpc64 processors support the new "light weight"  sync (lwsync).
>   */
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]