[PATCH 1/3] nptl: Extract <bits/atomic_wide_counter.h> from pthread_cond_common.c
Adhemerval Zanella
adhemerval.zanella@linaro.org
Mon Nov 15 19:24:39 GMT 2021
On 03/11/2021 13:27, Florian Weimer via Libc-alpha wrote:
> And make it an installed header. This addresses a few aliasing
> violations (which do not seem to result in miscompilation due to
> the use of atomics), and also enables use of wide counters in other
> parts of the library.
>
> The debug output in nptl/tst-cond22 has been adjusted to print
> the 32-bit values instead because it avoids a big-endian/little-endian
> difference.
LGTM, thanks.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
> ---
> bits/atomic_wide_counter.h | 35 ++++
> include/atomic_wide_counter.h | 89 +++++++++++
> include/bits/atomic_wide_counter.h | 1 +
> misc/Makefile | 3 +-
> misc/atomic_wide_counter.c | 127 +++++++++++++++
> nptl/Makefile | 13 +-
> nptl/pthread_cond_common.c | 204 ++++--------------------
> nptl/tst-cond22.c | 14 +-
> sysdeps/nptl/bits/thread-shared-types.h | 22 +--
> 9 files changed, 310 insertions(+), 198 deletions(-)
> create mode 100644 bits/atomic_wide_counter.h
> create mode 100644 include/atomic_wide_counter.h
> create mode 100644 include/bits/atomic_wide_counter.h
> create mode 100644 misc/atomic_wide_counter.c
>
> diff --git a/bits/atomic_wide_counter.h b/bits/atomic_wide_counter.h
> new file mode 100644
> index 0000000000..0687eb554e
> --- /dev/null
> +++ b/bits/atomic_wide_counter.h
> @@ -0,0 +1,35 @@
> +/* Monotonically increasing wide counters (at least 62 bits).
> + Copyright (C) 2016-2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifndef _BITS_ATOMIC_WIDE_COUNTER_H
> +#define _BITS_ATOMIC_WIDE_COUNTER_H
> +
> +/* Counter that is monotonically increasing (by less than 2**31 per
> + increment), with a single writer, and an arbitrary number of
> + readers. */
> +typedef union
> +{
> + __extension__ unsigned long long int __value64;
> + struct
> + {
> + unsigned int __low;
> + unsigned int __high;
> + } __value32;
> +} __atomic_wide_counter;
> +
> +#endif /* _BITS_ATOMIC_WIDE_COUNTER_H */
Ok, it would be included in multiple places so we can't tie to a specific
header.
> diff --git a/include/atomic_wide_counter.h b/include/atomic_wide_counter.h
> new file mode 100644
> index 0000000000..31f009d5e6
> --- /dev/null
> +++ b/include/atomic_wide_counter.h
> @@ -0,0 +1,89 @@
> +/* Monotonically increasing wide counters (at least 62 bits).
> + Copyright (C) 2016-2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifndef _ATOMIC_WIDE_COUNTER_H
> +#define _ATOMIC_WIDE_COUNTER_H
> +
> +#include <atomic.h>
> +#include <bits/atomic_wide_counter.h>
> +
> +#if __HAVE_64B_ATOMICS
> +
> +static inline uint64_t
> +__atomic_wide_counter_load_relaxed (__atomic_wide_counter *c)
> +{
> + return atomic_load_relaxed (&c->__value64);
> +}
> +
> +static inline uint64_t
> +__atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c,
> + unsigned int val)
> +{
> + return atomic_fetch_add_relaxed (&c->__value64, val);
> +}
> +
> +static inline uint64_t
> +__atomic_wide_counter_fetch_add_acquire (__atomic_wide_counter *c,
> + unsigned int val)
> +{
> + return atomic_fetch_add_acquire (&c->__value64, val);
> +}
> +
> +static inline void
> +__atomic_wide_counter_add_relaxed (__atomic_wide_counter *c,
> + unsigned int val)
> +{
> + atomic_store_relaxed (&c->__value64,
> + atomic_load_relaxed (&c->__value64) + val);
> +}
> +
> +static uint64_t __attribute__ ((unused))
> +__atomic_wide_counter_fetch_xor_release (__atomic_wide_counter *c,
> + unsigned int val)
> +{
> + return atomic_fetch_xor_release (&c->__value64, val);
> +}
> +
> +#else /* !__HAVE_64B_ATOMICS */
> +
> +uint64_t __atomic_wide_counter_load_relaxed (__atomic_wide_counter *c)
> + attribute_hidden;
> +
> +uint64_t __atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c,
> + unsigned int op)
> + attribute_hidden;
> +
> +static inline uint64_t
> +__atomic_wide_counter_fetch_add_acquire (__atomic_wide_counter *c,
> + unsigned int val)
> +{
> + uint64_t r = __atomic_wide_counter_fetch_add_relaxed (c, val);
> + atomic_thread_fence_acquire ();
> + return r;
> +}
> +
> +static inline void
> +__atomic_wide_counter_add_relaxed (__atomic_wide_counter *c,
> + unsigned int val)
> +{
> + __atomic_wide_counter_fetch_add_relaxed (c, val);
> +}
> +
> +#endif /* !__HAVE_64B_ATOMICS */
> +
> +#endif /* _ATOMIC_WIDE_COUNTER_H */
Ok.
> diff --git a/include/bits/atomic_wide_counter.h b/include/bits/atomic_wide_counter.h
> new file mode 100644
> index 0000000000..8fb09a5291
> --- /dev/null
> +++ b/include/bits/atomic_wide_counter.h
> @@ -0,0 +1 @@
> +#include_next <bits/atomic_wide_counter.h>
Ok.
> diff --git a/misc/Makefile b/misc/Makefile
> index 1083ba3bfc..3b66cb9f6a 100644
> --- a/misc/Makefile
> +++ b/misc/Makefile
> @@ -73,7 +73,8 @@ routines := brk sbrk sstk ioctl \
> fgetxattr flistxattr fremovexattr fsetxattr getxattr \
> listxattr lgetxattr llistxattr lremovexattr lsetxattr \
> removexattr setxattr getauxval ifunc-impl-list makedev \
> - allocate_once fd_to_filename single_threaded unwind-link
> + allocate_once fd_to_filename single_threaded unwind-link \
> + atomic_wide_counter
>
> generated += tst-error1.mtrace tst-error1-mem.out \
> tst-allocate_once.mtrace tst-allocate_once-mem.out
Ok.
> diff --git a/misc/atomic_wide_counter.c b/misc/atomic_wide_counter.c
> new file mode 100644
> index 0000000000..56d8981925
> --- /dev/null
> +++ b/misc/atomic_wide_counter.c
> @@ -0,0 +1,127 @@
> +/* Monotonically increasing wide counters (at least 62 bits).
> + Copyright (C) 2016-2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <atomic_wide_counter.h>
> +
> +#if !__HAVE_64B_ATOMICS
> +
> +/* Values we add or xor are less than or equal to 1<<31, so we only
> + have to make overflow-and-addition atomic wrt. to concurrent load
> + operations and xor operations. To do that, we split each counter
> + into two 32b values of which we reserve the MSB of each to
> + represent an overflow from the lower-order half to the higher-order
> + half.
> +
> + In the common case, the state is (higher-order / lower-order half, and . is
> + basically concatenation of the bits):
> + 0.h / 0.l = h.l
> +
> + When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the
> + following steps S1-S4 (the values these represent are on the right-hand
> + side):
> + S1: 0.h / 1.L == (h+1).L
> + S2: 1.(h+1) / 1.L == (h+1).L
> + S3: 1.(h+1) / 0.L == (h+1).L
> + S4: 0.(h+1) / 0.L == (h+1).L
> + If the LSB of the higher-order half is set, readers will ignore the
> + overflow bit in the lower-order half.
> +
> + To get an atomic snapshot in load operations, we exploit that the
> + higher-order half is monotonically increasing; if we load a value V from
> + it, then read the lower-order half, and then read the higher-order half
> + again and see the same value V, we know that both halves have existed in
> + the sequence of values the full counter had. This is similar to the
> + validated reads in the time-based STMs in GCC's libitm (e.g.,
> + method_ml_wt).
> +
> + One benefit of this scheme is that this makes load operations
> + obstruction-free because unlike if we would just lock the counter, readers
> + can almost always interpret a snapshot of each halves. Readers can be
> + forced to read a new snapshot when the read is concurrent with an overflow.
> + However, overflows will happen infrequently, so load operations are
> + practically lock-free. */
> +
> +uint64_t
> +__atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c,
> + unsigned int op)
> +{
> + /* S1. Note that this is an atomic read-modify-write so it extends the
> + release sequence of release MO store at S3. */
> + unsigned int l = atomic_fetch_add_relaxed (&c->__value32.__low, op);
> + unsigned int h = atomic_load_relaxed (&c->__value32.__high);
> + uint64_t result = ((uint64_t) h << 31) | l;
> + l += op;
> + if ((l >> 31) > 0)
> + {
> + /* Overflow. Need to increment higher-order half. Note that all
> + add operations are ordered in happens-before. */
> + h++;
> + /* S2. Release MO to synchronize with the loads of the higher-order half
> + in the load operation. See __condvar_load_64_relaxed. */
> + atomic_store_release (&c->__value32.__high,
> + h | ((unsigned int) 1 << 31));
> + l ^= (unsigned int) 1 << 31;
> + /* S3. See __condvar_load_64_relaxed. */
> + atomic_store_release (&c->__value32.__low, l);
> + /* S4. Likewise. */
> + atomic_store_release (&c->__value32.__high, h);
> + }
> + return result;
> +}
> +
> +uint64_t
> +__atomic_wide_counter_load_relaxed (__atomic_wide_counter *c)
> +{
> + unsigned int h, l, h2;
> + do
> + {
> + /* This load and the second one below to the same location read from the
> + stores in the overflow handling of the add operation or the
> + initializing stores (which is a simple special case because
> + initialization always completely happens before further use).
> + Because no two stores to the higher-order half write the same value,
> + the loop ensures that if we continue to use the snapshot, this load
> + and the second one read from the same store operation. All candidate
> + store operations have release MO.
> + If we read from S2 in the first load, then we will see the value of
> + S1 on the next load (because we synchronize with S2), or a value
> + later in modification order. We correctly ignore the lower-half's
> + overflow bit in this case. If we read from S4, then we will see the
> + value of S3 in the next load (or a later value), which does not have
> + the overflow bit set anymore.
> + */
> + h = atomic_load_acquire (&c->__value32.__high);
> + /* This will read from the release sequence of S3 (i.e, either the S3
> + store or the read-modify-writes at S1 following S3 in modification
> + order). Thus, the read synchronizes with S3, and the following load
> + of the higher-order half will read from the matching S2 (or a later
> + value).
> + Thus, if we read a lower-half value here that already overflowed and
> + belongs to an increased higher-order half value, we will see the
> + latter and h and h2 will not be equal. */
> + l = atomic_load_acquire (&c->__value32.__low);
> + /* See above. */
> + h2 = atomic_load_relaxed (&c->__value32.__high);
> + }
> + while (h != h2);
> + if (((l >> 31) > 0) && ((h >> 31) > 0))
> + l ^= (unsigned int) 1 << 31;
> + return ((uint64_t) (h & ~((unsigned int) 1 << 31)) << 31) + l;
> +}
> +
> +#endif /* !__HAVE_64B_ATOMICS */
Ok, it is basically moving out the code from pthread_cond one.
> diff --git a/nptl/Makefile b/nptl/Makefile
> index ff4d590f11..6310aecaad 100644
> --- a/nptl/Makefile
> +++ b/nptl/Makefile
> @@ -22,8 +22,14 @@ subdir := nptl
>
> include ../Makeconfig
>
> -headers := pthread.h semaphore.h bits/semaphore.h \
> - bits/struct_mutex.h bits/struct_rwlock.h
> +headers := \
> + bits/atomic_wide_counter.h \
> + bits/semaphore.h \
> + bits/struct_mutex.h \
> + bits/struct_rwlock.h \
> + pthread.h \
> + semaphore.h \
> + # headers
>
> extra-libs := libpthread
> extra-libs-others := $(extra-libs)
Ok.
> @@ -270,7 +276,7 @@ tests = tst-attr2 tst-attr3 tst-default-attr \
> tst-mutexpi1 tst-mutexpi2 tst-mutexpi3 tst-mutexpi4 \
> tst-mutexpi5 tst-mutexpi5a tst-mutexpi6 tst-mutexpi7 tst-mutexpi7a \
> tst-mutexpi9 tst-mutexpi10 \
> - tst-cond22 tst-cond26 \
> + tst-cond26 \
> tst-robustpi1 tst-robustpi2 tst-robustpi3 tst-robustpi4 tst-robustpi5 \
> tst-robustpi6 tst-robustpi7 tst-robustpi9 \
> tst-rwlock2 tst-rwlock2a tst-rwlock2b tst-rwlock3 \
> @@ -319,6 +325,7 @@ tests-internal := tst-robustpi8 tst-rwlock19 tst-rwlock20 \
> tst-barrier5 tst-signal7 tst-mutex8 tst-mutex8-static \
> tst-mutexpi8 tst-mutexpi8-static \
> tst-setgetname \
> + tst-cond22 \
>
> xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \
> tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups \
Ok.
> diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c
> index c35b9ef03a..fb56f93b6e 100644
> --- a/nptl/pthread_cond_common.c
> +++ b/nptl/pthread_cond_common.c
> @@ -17,79 +17,52 @@
> <https://www.gnu.org/licenses/>. */
>
> #include <atomic.h>
> +#include <atomic_wide_counter.h>
> #include <stdint.h>
> #include <pthread.h>
>
> -/* We need 3 least-significant bits on __wrefs for something else. */
> +/* We need 3 least-significant bits on __wrefs for something else.
> + This also matches __atomic_wide_counter requirements: The highest
> + value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to __g1_start
> + (the two extra bits are for the lock in the two LSBs of
> + __g1_start). */
> #define __PTHREAD_COND_MAX_GROUP_SIZE ((unsigned) 1 << 29)
>
> -#if __HAVE_64B_ATOMICS == 1
> -
> -static uint64_t __attribute__ ((unused))
> +static inline uint64_t
> __condvar_load_wseq_relaxed (pthread_cond_t *cond)
> {
> - return atomic_load_relaxed (&cond->__data.__wseq);
> + return __atomic_wide_counter_load_relaxed (&cond->__data.__wseq);
> }
>
> -static uint64_t __attribute__ ((unused))
> +static inline uint64_t
> __condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val)
> {
> - return atomic_fetch_add_acquire (&cond->__data.__wseq, val);
> + return __atomic_wide_counter_fetch_add_acquire (&cond->__data.__wseq, val);
> }
>
> -static uint64_t __attribute__ ((unused))
> -__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
> +static inline uint64_t
> +__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
> {
> - return atomic_fetch_xor_release (&cond->__data.__wseq, val);
> + return __atomic_wide_counter_load_relaxed (&cond->__data.__g1_start);
> }
>
> -static uint64_t __attribute__ ((unused))
> -__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
> +static inline void
> +__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
> {
> - return atomic_load_relaxed (&cond->__data.__g1_start);
> + __atomic_wide_counter_add_relaxed (&cond->__data.__g1_start, val);
> }
>
> -static void __attribute__ ((unused))
> -__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
> +#if __HAVE_64B_ATOMICS == 1
> +
> +static inline uint64_t
> +__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
> {
> - atomic_store_relaxed (&cond->__data.__g1_start,
> - atomic_load_relaxed (&cond->__data.__g1_start) + val);
> + return atomic_fetch_xor_release (&cond->__data.__wseq.__value64, val);
> }
>
Ok, although the reorganization of the placemente makes review a bit confusing.
> -#else
> -
> -/* We use two 64b counters: __wseq and __g1_start. They are monotonically
> - increasing and single-writer-multiple-readers counters, so we can implement
> - load, fetch-and-add, and fetch-and-xor operations even when we just have
> - 32b atomics. Values we add or xor are less than or equal to 1<<31 (*),
> - so we only have to make overflow-and-addition atomic wrt. to concurrent
> - load operations and xor operations. To do that, we split each counter into
> - two 32b values of which we reserve the MSB of each to represent an
> - overflow from the lower-order half to the higher-order half.
> -
> - In the common case, the state is (higher-order / lower-order half, and . is
> - basically concatenation of the bits):
> - 0.h / 0.l = h.l
> -
> - When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the
> - following steps S1-S4 (the values these represent are on the right-hand
> - side):
> - S1: 0.h / 1.L == (h+1).L
> - S2: 1.(h+1) / 1.L == (h+1).L
> - S3: 1.(h+1) / 0.L == (h+1).L
> - S4: 0.(h+1) / 0.L == (h+1).L
> - If the LSB of the higher-order half is set, readers will ignore the
> - overflow bit in the lower-order half.
> -
> - To get an atomic snapshot in load operations, we exploit that the
> - higher-order half is monotonically increasing; if we load a value V from
> - it, then read the lower-order half, and then read the higher-order half
> - again and see the same value V, we know that both halves have existed in
> - the sequence of values the full counter had. This is similar to the
> - validated reads in the time-based STMs in GCC's libitm (e.g.,
> - method_ml_wt).
> -
> - The xor operation needs to be an atomic read-modify-write. The write
> +#else /* !__HAVE_64B_ATOMICS */
> +
> +/* The xor operation needs to be an atomic read-modify-write. The write
> itself is not an issue as it affects just the lower-order half but not bits
> used in the add operation. To make the full fetch-and-xor atomic, we
> exploit that concurrently, the value can increase by at most 1<<31 (*): The
> @@ -97,117 +70,18 @@ __condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
> than __PTHREAD_COND_MAX_GROUP_SIZE waiters can enter concurrently and thus
> increment __wseq. Therefore, if the xor operation observes a value of
> __wseq, then the value it applies the modification to later on can be
> - derived (see below).
> -
> - One benefit of this scheme is that this makes load operations
> - obstruction-free because unlike if we would just lock the counter, readers
> - can almost always interpret a snapshot of each halves. Readers can be
> - forced to read a new snapshot when the read is concurrent with an overflow.
> - However, overflows will happen infrequently, so load operations are
> - practically lock-free.
> -
> - (*) The highest value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to
> - __g1_start (the two extra bits are for the lock in the two LSBs of
> - __g1_start). */
> -
> -typedef struct
> -{
> - unsigned int low;
> - unsigned int high;
> -} _condvar_lohi;
> -
> -static uint64_t
> -__condvar_fetch_add_64_relaxed (_condvar_lohi *lh, unsigned int op)
> -{
> - /* S1. Note that this is an atomic read-modify-write so it extends the
> - release sequence of release MO store at S3. */
> - unsigned int l = atomic_fetch_add_relaxed (&lh->low, op);
> - unsigned int h = atomic_load_relaxed (&lh->high);
> - uint64_t result = ((uint64_t) h << 31) | l;
> - l += op;
> - if ((l >> 31) > 0)
> - {
> - /* Overflow. Need to increment higher-order half. Note that all
> - add operations are ordered in happens-before. */
> - h++;
> - /* S2. Release MO to synchronize with the loads of the higher-order half
> - in the load operation. See __condvar_load_64_relaxed. */
> - atomic_store_release (&lh->high, h | ((unsigned int) 1 << 31));
> - l ^= (unsigned int) 1 << 31;
> - /* S3. See __condvar_load_64_relaxed. */
> - atomic_store_release (&lh->low, l);
> - /* S4. Likewise. */
> - atomic_store_release (&lh->high, h);
> - }
> - return result;
> -}
> -
> -static uint64_t
> -__condvar_load_64_relaxed (_condvar_lohi *lh)
> -{
> - unsigned int h, l, h2;
> - do
> - {
> - /* This load and the second one below to the same location read from the
> - stores in the overflow handling of the add operation or the
> - initializing stores (which is a simple special case because
> - initialization always completely happens before further use).
> - Because no two stores to the higher-order half write the same value,
> - the loop ensures that if we continue to use the snapshot, this load
> - and the second one read from the same store operation. All candidate
> - store operations have release MO.
> - If we read from S2 in the first load, then we will see the value of
> - S1 on the next load (because we synchronize with S2), or a value
> - later in modification order. We correctly ignore the lower-half's
> - overflow bit in this case. If we read from S4, then we will see the
> - value of S3 in the next load (or a later value), which does not have
> - the overflow bit set anymore.
> - */
> - h = atomic_load_acquire (&lh->high);
> - /* This will read from the release sequence of S3 (i.e, either the S3
> - store or the read-modify-writes at S1 following S3 in modification
> - order). Thus, the read synchronizes with S3, and the following load
> - of the higher-order half will read from the matching S2 (or a later
> - value).
> - Thus, if we read a lower-half value here that already overflowed and
> - belongs to an increased higher-order half value, we will see the
> - latter and h and h2 will not be equal. */
> - l = atomic_load_acquire (&lh->low);
> - /* See above. */
> - h2 = atomic_load_relaxed (&lh->high);
> - }
> - while (h != h2);
> - if (((l >> 31) > 0) && ((h >> 31) > 0))
> - l ^= (unsigned int) 1 << 31;
> - return ((uint64_t) (h & ~((unsigned int) 1 << 31)) << 31) + l;
> -}
> -
> -static uint64_t __attribute__ ((unused))
> -__condvar_load_wseq_relaxed (pthread_cond_t *cond)
> -{
> - return __condvar_load_64_relaxed ((_condvar_lohi *) &cond->__data.__wseq32);
> -}
> -
> -static uint64_t __attribute__ ((unused))
> -__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val)
> -{
> - uint64_t r = __condvar_fetch_add_64_relaxed
> - ((_condvar_lohi *) &cond->__data.__wseq32, val);
> - atomic_thread_fence_acquire ();
> - return r;
> -}
> + derived. */
>
Ok.
> static uint64_t __attribute__ ((unused))
> __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
> {
> - _condvar_lohi *lh = (_condvar_lohi *) &cond->__data.__wseq32;
> /* First, get the current value. See __condvar_load_64_relaxed. */
> unsigned int h, l, h2;
> do
> {
> - h = atomic_load_acquire (&lh->high);
> - l = atomic_load_acquire (&lh->low);
> - h2 = atomic_load_relaxed (&lh->high);
> + h = atomic_load_acquire (&cond->__data.__wseq.__value32.__high);
> + l = atomic_load_acquire (&cond->__data.__wseq.__value32.__low);
> + h2 = atomic_load_relaxed (&cond->__data.__wseq.__value32.__high);
> }
> while (h != h2);
> if (((l >> 31) > 0) && ((h >> 31) == 0))
Ok.
> @@ -219,8 +93,9 @@ __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
> earlier in modification order than the following fetch-xor.
> This uses release MO to make the full operation have release semantics
> (all other operations access the lower-order half). */
> - unsigned int l2 = atomic_fetch_xor_release (&lh->low, val)
> - & ~((unsigned int) 1 << 31);
> + unsigned int l2
> + = (atomic_fetch_xor_release (&cond->__data.__wseq.__value32.__low, val)
> + & ~((unsigned int) 1 << 31));
> if (l2 < l)
> /* The lower-order half overflowed in the meantime. This happened exactly
> once due to the limit on concurrent waiters (see above). */
> @@ -228,22 +103,7 @@ __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
> return ((uint64_t) h << 31) + l2;
> }
Ok.
>
> -static uint64_t __attribute__ ((unused))
> -__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
> -{
> - return __condvar_load_64_relaxed
> - ((_condvar_lohi *) &cond->__data.__g1_start32);
> -}
> -
> -static void __attribute__ ((unused))
> -__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
> -{
> - ignore_value (__condvar_fetch_add_64_relaxed
> - ((_condvar_lohi *) &cond->__data.__g1_start32, val));
> -}
> -
> -#endif /* !__HAVE_64B_ATOMICS */
> -
> +#endif /* !__HAVE_64B_ATOMICS */
>
> /* The lock that signalers use. See pthread_cond_wait_common for uses.
> The lock is our normal three-state lock: not acquired (0) / acquired (1) /
Ok.
> diff --git a/nptl/tst-cond22.c b/nptl/tst-cond22.c
> index 64f19ea0a5..1336e9c79d 100644
> --- a/nptl/tst-cond22.c
> +++ b/nptl/tst-cond22.c
> @@ -106,8 +106,11 @@ do_test (void)
> status = 1;
> }
>
> - printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n",
> - c.__data.__wseq, c.__data.__g1_start,
> + printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n",
> + c.__data.__wseq.__value32.__high,
> + c.__data.__wseq.__value32.__low,
> + c.__data.__g1_start.__value32.__high,
> + c.__data.__g1_start.__value32.__low,
> c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0],
> c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1],
> c.__data.__g1_orig_size, c.__data.__wrefs);
> @@ -149,8 +152,11 @@ do_test (void)
> status = 1;
> }
>
> - printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n",
> - c.__data.__wseq, c.__data.__g1_start,
> + printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n",
> + c.__data.__wseq.__value32.__high,
> + c.__data.__wseq.__value32.__low,
> + c.__data.__g1_start.__value32.__high,
> + c.__data.__g1_start.__value32.__low,
> c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0],
> c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1],
> c.__data.__g1_orig_size, c.__data.__wrefs);
Ok.
> diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h
> index 44bf1e358d..b82a79a43e 100644
> --- a/sysdeps/nptl/bits/thread-shared-types.h
> +++ b/sysdeps/nptl/bits/thread-shared-types.h
> @@ -43,6 +43,8 @@
>
> #include <bits/pthreadtypes-arch.h>
>
> +#include <bits/atomic_wide_counter.h>
> +
>
> /* Common definition of pthread_mutex_t. */
>
> @@ -91,24 +93,8 @@ typedef struct __pthread_internal_slist
>
> struct __pthread_cond_s
> {
> - __extension__ union
> - {
> - __extension__ unsigned long long int __wseq;
> - struct
> - {
> - unsigned int __low;
> - unsigned int __high;
> - } __wseq32;
> - };
> - __extension__ union
> - {
> - __extension__ unsigned long long int __g1_start;
> - struct
> - {
> - unsigned int __low;
> - unsigned int __high;
> - } __g1_start32;
> - };
> + __atomic_wide_counter __wseq;
> + __atomic_wide_counter __g1_start;
> unsigned int __g_refs[2] __LOCK_ALIGNMENT;
> unsigned int __g_size[2];
> unsigned int __g1_orig_size;
>
Ok.
More information about the Libc-alpha
mailing list