This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH 3/3] Add adaptive elision to rwlocks v2
- From: "Carlos O'Donell" <carlos at redhat dot com>
- To: Andi Kleen <andi at firstfloor dot org>, libc-alpha at sourceware dot org
- Cc: Andi Kleen <ak at linux dot intel dot com>
- Date: Mon, 09 Jun 2014 16:10:53 -0400
- Subject: Re: [PATCH 3/3] Add adaptive elision to rwlocks v2
- Authentication-results: sourceware.org; auth=none
- References: <1399412209-28245-1-git-send-email-andi at firstfloor dot org> <1399412209-28245-4-git-send-email-andi at firstfloor dot org>
On 05/06/2014 05:36 PM, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> This patch relies on the C version of the rwlocks posted earlier.
> With C rwlocks it is very straight forward to do adaptive elision
> using TSX. It is based on the infrastructure added earlier
> for mutexes, but uses its own elision macros. The macros
> are fairly general purpose and could be used for other
> elision purposes too.
>
> This version is much cleaner than the earlier assembler based
> version, and in particular implements adaptation which makes
> it safer.
>
> I changed the behavior slightly to not require any changes
> in the test suite and fully conform to all expected
> behaviors (generally at the cost of not eliding in
> various situations). In particular this means the timedlock
> variants are not elided. Nested trylock aborts.
>
> v2: Address Roland's feedback. Add ACCESS_ONCE usage.
> I didn't use the nested function inlines, it
> seemed too fragile. Also it still uses macros
> to be generic.
Almost OK. Uses an instance of "centralized defaults" pattern
which we want to avoid in "macro apis" because of our usage
of -Wundef and subsequent hardening of the APIs.
> nptl/:
>
> 2014-05-06 Andi Kleen <ak@linux.intel.com>
>
> * pthread_rwlock_rdlock.c: Include elide.h.
> (pthread_rwlock_rdlock): Add elision.
> * pthread_rwlock_wrlock.c: Include elide.h.
> (pthread_rwlock_wrlock): Add elision.
> * pthread_rwlock_trywrlock.c: Include elide.h.
> (pthread_rwlock_trywrlock): Add elision.
> * pthread_rwlock_tryrdlock.c: Include elide.h.
> (pthread_rwlock_tryrdlock): Add elision.
> * pthread_rwlock_unlock.c: Include elide.h.
> (pthread_rwlock_tryrdlock): Add elision unlock.
> * sysdeps/pthread/pthread.h:
> (__PTHREAD_RWLOCK_ELISION_EXTRA): Handle new define
> (PTHREAD_RWLOCK_INITIALIZER,
> PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP):
> Handle new elision field.
> * sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h:
> (pthread_rwlock_t): Change __pad1 to __rwelision.
> (__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
> * sysdeps/unix/sysv/linux/x86/elide.h:
> New file. Add generic elision macros.
> * sysdeps/unix/sysv/linux/x86/elision-conf.c:
> (elision_init): Set try_xbegin to zero when no RTM.
> ---
> nptl/pthread_rwlock_rdlock.c | 7 ++
> nptl/pthread_rwlock_tryrdlock.c | 7 ++
> nptl/pthread_rwlock_trywrlock.c | 7 ++
> nptl/pthread_rwlock_unlock.c | 6 ++
> nptl/pthread_rwlock_wrlock.c | 7 ++
> nptl/sysdeps/pthread/pthread.h | 10 +-
> .../unix/sysv/linux/x86/bits/pthreadtypes.h | 6 +-
> nptl/sysdeps/unix/sysv/linux/x86/elide.h | 109 +++++++++++++++++++++
> nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c | 2 +
> 9 files changed, 156 insertions(+), 5 deletions(-)
> create mode 100644 nptl/sysdeps/unix/sysv/linux/x86/elide.h
>
> diff --git a/nptl/pthread_rwlock_rdlock.c b/nptl/pthread_rwlock_rdlock.c
> index 1df0327..6eb9e09 100644
> --- a/nptl/pthread_rwlock_rdlock.c
> +++ b/nptl/pthread_rwlock_rdlock.c
> @@ -22,6 +22,7 @@
> #include <pthread.h>
> #include <pthreadP.h>
> #include <stap-probe.h>
> +#include <elide.h>
OK.
>
>
> /* Acquire read lock for RWLOCK. Slow path. */
> @@ -102,6 +103,12 @@ __pthread_rwlock_rdlock (pthread_rwlock_t *rwlock)
>
> LIBC_PROBE (rdlock_entry, 1, rwlock);
>
> + if (ELIDE_LOCK (rwlock->__data.__rwelision,
> + rwlock->__data.__lock == 0
> + && rwlock->__data.__writer == 0
> + && rwlock->__data.__nr_readers == 0))
> + return 0;
OK.
> +
> /* Make sure we are alone. */
> lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
>
> diff --git a/nptl/pthread_rwlock_tryrdlock.c b/nptl/pthread_rwlock_tryrdlock.c
> index f7b1e6b..cf8d68e 100644
> --- a/nptl/pthread_rwlock_tryrdlock.c
> +++ b/nptl/pthread_rwlock_tryrdlock.c
> @@ -19,6 +19,7 @@
> #include <errno.h>
> #include "pthreadP.h"
> #include <lowlevellock.h>
> +#include <elide.h>
OK.
>
>
> int
> @@ -26,6 +27,12 @@ __pthread_rwlock_tryrdlock (pthread_rwlock_t *rwlock)
> {
> int result = EBUSY;
>
> + if (ELIDE_TRYLOCK (rwlock->__data.__rwelision,
> + rwlock->__data.__lock == 0
> + && rwlock->__data.__nr_readers == 0
> + && rwlock->__data.__writer, 0))
> + return 0;
OK.
> +
> lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
>
> if (rwlock->__data.__writer == 0
> diff --git a/nptl/pthread_rwlock_trywrlock.c b/nptl/pthread_rwlock_trywrlock.c
> index 106f157..0291fc9 100644
> --- a/nptl/pthread_rwlock_trywrlock.c
> +++ b/nptl/pthread_rwlock_trywrlock.c
> @@ -19,6 +19,7 @@
> #include <errno.h>
> #include "pthreadP.h"
> #include <lowlevellock.h>
> +#include <elide.h>
OK.
>
>
> int
> @@ -26,6 +27,12 @@ __pthread_rwlock_trywrlock (pthread_rwlock_t *rwlock)
> {
> int result = EBUSY;
>
> + if (ELIDE_TRYLOCK (rwlock->__data.__rwelision,
> + rwlock->__data.__lock == 0
> + && rwlock->__data.__nr_readers == 0
> + && rwlock->__data.__writer, 1))
> + return 0;
OK.
> +
> lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
>
> if (rwlock->__data.__writer == 0 && rwlock->__data.__nr_readers == 0)
> diff --git a/nptl/pthread_rwlock_unlock.c b/nptl/pthread_rwlock_unlock.c
> index d492383..3ebddeb 100644
> --- a/nptl/pthread_rwlock_unlock.c
> +++ b/nptl/pthread_rwlock_unlock.c
> @@ -22,6 +22,8 @@
> #include <pthread.h>
> #include <pthreadP.h>
> #include <stap-probe.h>
> +#include <elide.h>
OK.
> +
>
> /* Unlock RWLOCK. */
> int
> @@ -29,6 +31,10 @@ __pthread_rwlock_unlock (pthread_rwlock_t *rwlock)
> {
> LIBC_PROBE (rwlock_unlock, 1, rwlock);
>
> + if (ELIDE_UNLOCK (rwlock->__data.__writer == 0
> + && rwlock->__data.__nr_readers == 0))
> + return 0;
OK.
> +
> lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
> if (rwlock->__data.__writer)
> rwlock->__data.__writer = 0;
> diff --git a/nptl/pthread_rwlock_wrlock.c b/nptl/pthread_rwlock_wrlock.c
> index de54e51..91ad82a 100644
> --- a/nptl/pthread_rwlock_wrlock.c
> +++ b/nptl/pthread_rwlock_wrlock.c
> @@ -22,6 +22,7 @@
> #include <pthread.h>
> #include <pthreadP.h>
> #include <stap-probe.h>
> +#include <elide.h>
>
>
> /* Acquire write lock for RWLOCK. */
> @@ -91,6 +92,12 @@ __pthread_rwlock_wrlock (pthread_rwlock_t *rwlock)
> {
> LIBC_PROBE (wrlock_entry, 1, rwlock);
>
> + if (ELIDE_LOCK (rwlock->__data.__rwelision,
> + rwlock->__data.__lock == 0
> + && rwlock->__data.__writer == 0
> + && rwlock->__data.__nr_readers == 0))
> + return 0;
OK.
> +
> /* Make sure we are alone. */
> lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
>
> diff --git a/nptl/sysdeps/pthread/pthread.h b/nptl/sysdeps/pthread/pthread.h
> index 40a3e21..7926145 100644
> --- a/nptl/sysdeps/pthread/pthread.h
> +++ b/nptl/sysdeps/pthread/pthread.h
> @@ -130,19 +130,23 @@ enum
> # endif
> #endif
>
> +#ifndef __PTHREAD_RWLOCK_ELISION_EXTRA
> +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0
This is the "centralized defaults" pattern we are trying to avoid.
No macro API should use this and we need something that is -Wundef safe.
Please see my writeup here:
https://sourceware.org/glibc/wiki/Wundef
> +#endif
> +
> /* Read-write lock initializers. */
> # define PTHREAD_RWLOCK_INITIALIZER \
> - { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }
> + { { 0, 0, 0, 0, 0, 0, 0, 0, __PTHREAD_RWLOCK_ELISION_EXTRA, 0, 0 } }
> # ifdef __USE_GNU
> # ifdef __PTHREAD_RWLOCK_INT_FLAGS_SHARED
> # define PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP \
> - { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
> + { { 0, 0, 0, 0, 0, 0, 0, 0, __PTHREAD_RWLOCK_ELISION_EXTRA, 0, \
> PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP } }
> # else
> # if __BYTE_ORDER == __LITTLE_ENDIAN
> # define PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP \
> { { 0, 0, 0, 0, 0, 0, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP, \
> - 0, 0, 0, 0 } }
> + 0, __PTHREAD_RWLOCK_ELISION_EXTRA, 0, 0 } }
> # else
> # define PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP \
> { { 0, 0, 0, 0, 0, 0, 0, 0, 0, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP,\
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
> index b4329f6..bb1329c 100644
> --- a/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
> +++ b/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
> @@ -184,11 +184,13 @@ typedef union
> unsigned int __nr_writers_queued;
> int __writer;
> int __shared;
> - unsigned long int __pad1;
> + signed char __rwelision;
> + unsigned char __pad1[7];
OK.
> unsigned long int __pad2;
> /* FLAGS must stay at this position in the structure to maintain
> binary compatibility. */
> unsigned int __flags;
> +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, {0, 0, 0, 0, 0, 0, 0 }
OK.
> # define __PTHREAD_RWLOCK_INT_FLAGS_SHARED 1
> } __data;
> # else
> @@ -204,7 +206,7 @@ typedef union
> binary compatibility. */
> unsigned char __flags;
> unsigned char __shared;
> - unsigned char __pad1;
> + signed char __rwelision;
OK.
> unsigned char __pad2;
> int __writer;
> } __data;
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86/elide.h b/nptl/sysdeps/unix/sysv/linux/x86/elide.h
> new file mode 100644
> index 0000000..19f27e5
> --- /dev/null
> +++ b/nptl/sysdeps/unix/sysv/linux/x86/elide.h
> @@ -0,0 +1,109 @@
> +/* elide.h: Generic lock elision support.
> + Copyright (C) 2014 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +#ifndef ELIDE_H
> +#define ELIDE_H 1
> +
> +#include <hle.h>
> +#include <elision-conf.h>
> +
> +#define ACCESS_ONCE(x) (* (volatile typeof(x) *) &(x))
OK.
> +
> +/* Adapt elision with ADAPT_COUNT and STATUS and decide retries. */
> +
> +static inline bool
> +elision_adapt(uint8_t *adapt_count, unsigned int status)
> +{
> + if (status & _XABORT_RETRY)
> + return false;
> + if ((status & _XABORT_EXPLICIT)
> + && _XABORT_CODE (status) == _ABORT_LOCK_BUSY)
> + {
> + /* Right now we skip here. Better would be to wait a bit
> + and retry. This likely needs some spinning. Be careful
> + to avoid writing the lock. */
> + if (*adapt_count != __elision_aconf.skip_lock_busy)
> + ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_busy;
> + }
> + /* Internal abort. There is no chance for retry.
> + Use the normal locking and next time use lock.
> + Be careful to avoid writing to the lock. */
> + else if (*adapt_count != __elision_aconf.skip_lock_internal_abort)
> + ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_internal_abort;
> + return true;
> +}
OK.
> +
> +/* is_lock_free must be executed inside the transaction */
> +
> +/* Returns true if lock defined by IS_LOCK_FREE was elided.
> + ADAPT_COUNT is a pointer to per-lock state variable. */
> +
> +#define ELIDE_LOCK(adapt_count, is_lock_free) \
> + ({ \
> + int ret = 0; \
> + \
> + if ((adapt_count) <= 0) \
> + { \
> + for (int i = __elision_aconf.retry_try_xbegin; i > 0; i--) \
OK.
> + { \
> + unsigned int status; \
> + if ((status = _xbegin ()) == _XBEGIN_STARTED) \
> + { \
> + if (is_lock_free) \
> + { \
> + ret = 1; \
> + break; \
> + } \
> + _xabort (_ABORT_LOCK_BUSY); \
> + } \
> + if (!elision_adapt (&(adapt_count), status)) \
> + break; \
> + } \
> + } \
> + else \
> + (adapt_count)--; /* missing updates ok */ \
> + ret; \
> + })
OK.
> +
> +/* Returns true if lock defined by IS_LOCK_FREE was try-elided.
> + ADAPT_COUNT is a pointer to per-lock state variable. */
> +
> +#define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) ({ \
> + int ret = 0; \
> + if (__elision_aconf.retry_try_xbegin > 0) \
> + { \
> + if (write) \
> + _xabort (_ABORT_NESTED_TRYLOCK); \
> + ret = ELIDE_LOCK (adapt_count, is_lock_free); \
> + } \
> + ret; \
> + })
OK.
> +
> +/* Returns true if lock defined by IS_LOCK_FREE was elided. */
> +
> +#define ELIDE_UNLOCK(is_lock_free) \
> + ({ \
> + int ret = 0; \
> + if (is_lock_free) \
> + { \
> + _xend (); \
> + ret = 1; \
> + } \
> + ret; \
> + })
OK.
> +
> +#endif
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c b/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c
> index e6f5d6d..28e48d9 100644
> --- a/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c
> +++ b/nptl/sysdeps/unix/sysv/linux/x86/elision-conf.c
> @@ -66,6 +66,8 @@ elision_init (int argc __attribute__ ((unused)),
> #ifdef ENABLE_LOCK_ELISION
> __pthread_force_elision = __libc_enable_secure ? 0 : __elision_available;
> #endif
> + if (!HAS_RTM)
> + __elision_aconf.retry_try_xbegin = 0; /* Disable elision on rwlocks */
OK.
> }
>
> #ifdef SHARED
>
Cheers,
Carlos.