This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[RFC PATCH 43/52] Y2038: add functions using futexes
This creates 64-bit time versions of the following APIs:
- pthread_rwlock_timedrdlock
- pthread_rwlock_timedwrlock
- pthread_mutex_timedlock
- pthread_cond_timedwait
- sem_timedwait
- aio_suspend
It also creates 64-bit time versions of the following
functions or macros:
- lll_timedlock_elision
- lll_timedlock
- __lll_timedlock_wait
- futex_reltimed_wait_cancelable
- lll_futex_timed_wait
- __pthread_cond_wait_common
- futex_abstimed_wait_cancelable
- lll_futex_timed_wait_bitset
- do_aio_misc_wait
- AIO_MISC_WAIT
- __new_sem_wait_slow
- do_futex_wait
- __pthread_rwlock_wrlock_full
- __pthread_rwlock_rdlock_full
- futex_abstimed_wait
Signed-off-by: Albert ARIBAUD (3ADEV) <albert.aribaud@3adev.fr>
---
nptl/Versions | 11 +
nptl/lll_timedlock_wait.c | 37 ++
nptl/pthread_cond_wait.c | 285 +++++++++++++
nptl/pthread_mutex_timedlock.c | 616 +++++++++++++++++++++++++++
nptl/pthread_rwlock_common.c | 591 +++++++++++++++++++++++++
nptl/pthread_rwlock_timedrdlock.c | 19 +
nptl/pthread_rwlock_timedwrlock.c | 19 +
nptl/sem_timedwait.c | 18 +
nptl/sem_wait.c | 24 ++
nptl/sem_waitcommon.c | 172 ++++++++
rt/Versions | 1 +
sysdeps/nptl/aio_misc.h | 39 ++
sysdeps/nptl/lowlevellock.h | 17 +
sysdeps/pthread/aio_suspend.c | 164 +++++++
sysdeps/unix/sysv/linux/futex-internal.h | 123 ++++++
sysdeps/unix/sysv/linux/lowlevellock-futex.h | 22 +
16 files changed, 2158 insertions(+)
diff --git a/nptl/Versions b/nptl/Versions
index 0ae5def464..9ed0872eab 100644
--- a/nptl/Versions
+++ b/nptl/Versions
@@ -272,4 +272,15 @@ libpthread {
__pthread_barrier_init; __pthread_barrier_wait;
__shm_directory;
}
+
+ # Y2038 symbols are given their own version until they can be put in
+ # the right place
+
+ GLIBC_Y2038 {
+ __pthread_rwlock_rdlock_t64;
+ __pthread_rwlock_wrlock_t64;
+ __pthread_mutex_timedlock_t64;
+ __sem_timedwait_t64;
+ __pthread_cond_timedwait_t64;
+ }
}
diff --git a/nptl/lll_timedlock_wait.c b/nptl/lll_timedlock_wait.c
index 604953c04c..d757d9d92e 100644
--- a/nptl/lll_timedlock_wait.c
+++ b/nptl/lll_timedlock_wait.c
@@ -57,3 +57,40 @@ __lll_timedlock_wait (int *futex, const struct timespec *abstime, int private)
return 0;
}
+
+/* 64-bit time version */
+
+int
+__lll_timedlock_wait_t64 (int *futex, const struct __timespec64 *abstime, int private)
+{
+ /* Reject invalid timeouts. */
+ if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
+ return EINVAL;
+
+ /* Try locking. */
+ while (atomic_exchange_acq (futex, 2) != 0)
+ {
+ struct timeval tv;
+
+ /* Get the current time. */
+ (void) __gettimeofday (&tv, NULL);
+
+ /* Compute relative timeout. */
+ struct timespec rt;
+ rt.tv_sec = abstime->tv_sec - tv.tv_sec;
+ rt.tv_nsec = abstime->tv_nsec - tv.tv_usec * 1000;
+ if (rt.tv_nsec < 0)
+ {
+ rt.tv_nsec += 1000000000;
+ --rt.tv_sec;
+ }
+
+ if (rt.tv_sec < 0)
+ return ETIMEDOUT;
+
+ /* If *futex == 2, wait until woken or timeout. */
+ lll_futex_timed_wait (futex, 2, &rt, private);
+ }
+
+ return 0;
+}
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index 7812b94a3a..4246e25aeb 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -647,6 +647,280 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
return (err != 0) ? err : result;
}
+/* 64-bit time variant */
+
+static __always_inline int
+__pthread_cond_wait_common_t64 (pthread_cond_t *cond, pthread_mutex_t *mutex,
+ const struct __timespec64 *abstime)
+{
+ const int maxspin = 0;
+ int err;
+ int result = 0;
+
+ LIBC_PROBE (cond_wait, 2, cond, mutex);
+
+ /* Acquire a position (SEQ) in the waiter sequence (WSEQ). We use an
+ atomic operation because signals and broadcasts may update the group
+ switch without acquiring the mutex. We do not need release MO here
+ because we do not need to establish any happens-before relation with
+ signalers (see __pthread_cond_signal); modification order alone
+ establishes a total order of waiters/signals. We do need acquire MO
+ to synchronize with group reinitialization in
+ __condvar_quiesce_and_switch_g1. */
+ uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2);
+ /* Find our group's index. We always go into what was G2 when we acquired
+ our position. */
+ unsigned int g = wseq & 1;
+ uint64_t seq = wseq >> 1;
+
+ /* Increase the waiter reference count. Relaxed MO is sufficient because
+ we only need to synchronize when decrementing the reference count. */
+ unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, 8);
+ int private = __condvar_get_private (flags);
+
+ /* Now that we are registered as a waiter, we can release the mutex.
+ Waiting on the condvar must be atomic with releasing the mutex, so if
+ the mutex is used to establish a happens-before relation with any
+ signaler, the waiter must be visible to the latter; thus, we release the
+ mutex after registering as waiter.
+ If releasing the mutex fails, we just cancel our registration as a
+ waiter and confirm that we have woken up. */
+ err = __pthread_mutex_unlock_usercnt (mutex, 0);
+ if (__glibc_unlikely (err != 0))
+ {
+ __condvar_cancel_waiting (cond, seq, g, private);
+ __condvar_confirm_wakeup (cond, private);
+ return err;
+ }
+
+ /* Now wait until a signal is available in our group or it is closed.
+ Acquire MO so that if we observe a value of zero written after group
+ switching in __condvar_quiesce_and_switch_g1, we synchronize with that
+ store and will see the prior update of __g1_start done while switching
+ groups too. */
+ unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
+
+ do
+ {
+ while (1)
+ {
+ /* Spin-wait first.
+ Note that spinning first without checking whether a timeout
+ passed might lead to what looks like a spurious wake-up even
+ though we should return ETIMEDOUT (e.g., if the caller provides
+ an absolute timeout that is clearly in the past). However,
+ (1) spurious wake-ups are allowed, (2) it seems unlikely that a
+ user will (ab)use pthread_cond_wait as a check for whether a
+ point in time is in the past, and (3) spinning first without
+ having to compare against the current time seems to be the right
+ choice from a performance perspective for most use cases. */
+ unsigned int spin = maxspin;
+ while (signals == 0 && spin > 0)
+ {
+ /* Check that we are not spinning on a group that's already
+ closed. */
+ if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
+ goto done;
+
+ /* TODO Back off. */
+
+ /* Reload signals. See above for MO. */
+ signals = atomic_load_acquire (cond->__data.__g_signals + g);
+ spin--;
+ }
+
+ /* If our group will be closed as indicated by the flag on signals,
+ don't bother grabbing a signal. */
+ if (signals & 1)
+ goto done;
+
+ /* If there is an available signal, don't block. */
+ if (signals != 0)
+ break;
+
+ /* No signals available after spinning, so prepare to block.
+ We first acquire a group reference and use acquire MO for that so
+ that we synchronize with the dummy read-modify-write in
+ __condvar_quiesce_and_switch_g1 if we read from that. In turn,
+ in this case this will make us see the closed flag on __g_signals
+ that designates a concurrent attempt to reuse the group's slot.
+ We use acquire MO for the __g_signals check to make the
+ __g1_start check work (see spinning above).
+ Note that the group reference acquisition will not mask the
+ release MO when decrementing the reference count because we use
+ an atomic read-modify-write operation and thus extend the release
+ sequence. */
+ atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
+ if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
+ || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
+ {
+ /* Our group is closed. Wake up any signalers that might be
+ waiting. */
+ __condvar_dec_grefs (cond, g, private);
+ goto done;
+ }
+
+ // Now block.
+ struct _pthread_cleanup_buffer buffer;
+ struct _condvar_cleanup_buffer cbuffer;
+ cbuffer.wseq = wseq;
+ cbuffer.cond = cond;
+ cbuffer.mutex = mutex;
+ cbuffer.private = private;
+ __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
+
+ if (abstime == NULL)
+ {
+ /* Block without a timeout. */
+ err = futex_wait_cancelable (
+ cond->__data.__g_signals + g, 0, private);
+ }
+ else
+ {
+ /* Block, but with a timeout.
+ Work around the fact that the kernel rejects negative timeout
+ values despite them being valid. */
+ if (__glibc_unlikely (abstime->tv_sec < 0))
+ err = ETIMEDOUT;
+
+ else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0)
+ {
+ /* CLOCK_MONOTONIC is requested. */
+ struct timespec rt;
+ struct __timespec64 rt64;
+ if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0)
+ __libc_fatal ("clock_gettime does not support "
+ "CLOCK_MONOTONIC");
+ /* Convert the absolute timeout value to a relative
+ timeout. */
+ rt64.tv_sec = abstime->tv_sec - rt.tv_sec;
+ rt64.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
+ if (rt64.tv_nsec < 0)
+ {
+ rt64.tv_nsec += 1000000000;
+ --rt64.tv_sec;
+ }
+ /* Did we already time out? */
+ if (__glibc_unlikely (rt64.tv_sec < 0))
+ err = ETIMEDOUT;
+ else
+ err = futex_reltimed_wait_cancelable_t64
+ (cond->__data.__g_signals + g, 0, &rt64, private);
+ }
+ else
+ {
+ /* Use CLOCK_REALTIME. */
+ err = futex_abstimed_wait_cancelable_t64
+ (cond->__data.__g_signals + g, 0, abstime, private);
+ }
+ }
+
+ __pthread_cleanup_pop (&buffer, 0);
+
+ if (__glibc_unlikely (err == ETIMEDOUT))
+ {
+ __condvar_dec_grefs (cond, g, private);
+ /* If we timed out, we effectively cancel waiting. Note that
+ we have decremented __g_refs before cancellation, so that a
+ deadlock between waiting for quiescence of our group in
+ __condvar_quiesce_and_switch_g1 and us trying to acquire
+ the lock during cancellation is not possible. */
+ __condvar_cancel_waiting (cond, seq, g, private);
+ result = ETIMEDOUT;
+ goto done;
+ }
+ else
+ __condvar_dec_grefs (cond, g, private);
+
+ /* Reload signals. See above for MO. */
+ signals = atomic_load_acquire (cond->__data.__g_signals + g);
+ }
+
+ }
+ /* Try to grab a signal. Use acquire MO so that we see an up-to-date value
+ of __g1_start below (see spinning above for a similar case). In
+ particular, if we steal from a more recent group, we will also see a
+ more recent __g1_start below. */
+ while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
+ &signals, signals - 2));
+
+ /* We consumed a signal but we could have consumed from a more recent group
+ that aliased with ours due to being in the same group slot. If this
+ might be the case our group must be closed as visible through
+ __g1_start. */
+ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
+ if (seq < (g1_start >> 1))
+ {
+ /* We potentially stole a signal from a more recent group but we do not
+ know which group we really consumed from.
+ We do not care about groups older than current G1 because they are
+ closed; we could have stolen from these, but then we just add a
+ spurious wake-up for the current groups.
+ We will never steal a signal from current G2 that was really intended
+ for G2 because G2 never receives signals (until it becomes G1). We
+ could have stolen a signal from G2 that was conservatively added by a
+ previous waiter that also thought it stole a signal -- but given that
+ that signal was added unnecessarily, it's not a problem if we steal
+ it.
+ Thus, the remaining case is that we could have stolen from the current
+ G1, where "current" means the __g1_start value we observed. However,
+ if the current G1 does not have the same slot index as we do, we did
+ not steal from it and do not need to undo that. This is the reason
+ for putting a bit with G2's index into__g1_start as well. */
+ if (((g1_start & 1) ^ 1) == g)
+ {
+ /* We have to conservatively undo our potential mistake of stealing
+ a signal. We can stop trying to do that when the current G1
+ changes because other spinning waiters will notice this too and
+ __condvar_quiesce_and_switch_g1 has checked that there are no
+ futex waiters anymore before switching G1.
+ Relaxed MO is fine for the __g1_start load because we need to
+ merely be able to observe this fact and not have to observe
+ something else as well.
+ ??? Would it help to spin for a little while to see whether the
+ current G1 gets closed? This might be worthwhile if the group is
+ small or close to being closed. */
+ unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
+ while (__condvar_load_g1_start_relaxed (cond) == g1_start)
+ {
+ /* Try to add a signal. We don't need to acquire the lock
+ because at worst we can cause a spurious wake-up. If the
+ group is in the process of being closed (LSB is true), this
+ has an effect similar to us adding a signal. */
+ if (((s & 1) != 0)
+ || atomic_compare_exchange_weak_relaxed
+ (cond->__data.__g_signals + g, &s, s + 2))
+ {
+ /* If we added a signal, we also need to add a wake-up on
+ the futex. We also need to do that if we skipped adding
+ a signal because the group is being closed because
+ while __condvar_quiesce_and_switch_g1 could have closed
+ the group, it might stil be waiting for futex waiters to
+ leave (and one of those waiters might be the one we stole
+ the signal from, which cause it to block using the
+ futex). */
+ futex_wake (cond->__data.__g_signals + g, 1, private);
+ break;
+ }
+ /* TODO Back off. */
+ }
+ }
+ }
+
+ done:
+
+ /* Confirm that we have been woken. We do that before acquiring the mutex
+ to allow for execution of pthread_cond_destroy while having acquired the
+ mutex. */
+ __condvar_confirm_wakeup (cond, private);
+
+ /* Woken up; now re-acquire the mutex. If this doesn't fail, return RESULT,
+ which is set to ETIMEDOUT if a timeout occured, or zero otherwise. */
+ err = __pthread_mutex_cond_lock (mutex);
+ /* XXX Abort on errors that are disallowed by POSIX? */
+ return (err != 0) ? err : result;
+}
+
/* See __pthread_cond_wait_common. */
int
@@ -667,6 +941,17 @@ __pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
return __pthread_cond_wait_common (cond, mutex, abstime);
}
+int
+__pthread_cond_timedwait_t64 (pthread_cond_t *cond, pthread_mutex_t *mutex,
+ const struct __timespec64 *abstime)
+{
+ /* Check parameter validity. This should also tell the compiler that
+ it can assume that abstime is not NULL. */
+ if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
+ return EINVAL;
+ return __pthread_cond_wait_common_t64 (cond, mutex, abstime);
+}
+
versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
GLIBC_2_3_2);
versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
index d5ec3141f3..6cad951129 100644
--- a/nptl/pthread_mutex_timedlock.c
+++ b/nptl/pthread_mutex_timedlock.c
@@ -32,6 +32,10 @@
#define lll_timedlock_elision(a,dummy,b,c) lll_timedlock(a, b, c)
#endif
+#ifndef lll_timedlock_elision_t64
+#define lll_timedlock_elision_t64(a,dummy,b,c) lll_timedlock_t64(a, b, c)
+#endif
+
#ifndef lll_trylock_elision
#define lll_trylock_elision(a,t) lll_trylock(a)
#endif
@@ -638,3 +642,615 @@ __pthread_mutex_timedlock (pthread_mutex_t *mutex,
return result;
}
weak_alias (__pthread_mutex_timedlock, pthread_mutex_timedlock)
+
+/* 64-bit time version */
+
+int
+pthread_mutex_timedlock_t64 (pthread_mutex_t *mutex,
+ const struct __timespec64 *abstime)
+{
+ int oldval;
+ pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
+ int result = 0;
+
+ LIBC_PROBE (mutex_timedlock_entry, 2, mutex, abstime);
+
+ /* We must not check ABSTIME here. If the thread does not block
+ abstime must not be checked for a valid value. */
+
+ switch (__builtin_expect (PTHREAD_MUTEX_TYPE_ELISION (mutex),
+ PTHREAD_MUTEX_TIMED_NP))
+ {
+ /* Recursive mutex. */
+ case PTHREAD_MUTEX_RECURSIVE_NP|PTHREAD_MUTEX_ELISION_NP:
+ case PTHREAD_MUTEX_RECURSIVE_NP:
+ /* Check whether we already hold the mutex. */
+ if (mutex->__data.__owner == id)
+ {
+ /* Just bump the counter. */
+ if (__glibc_unlikely (mutex->__data.__count + 1 == 0))
+ /* Overflow of the counter. */
+ return EAGAIN;
+
+ ++mutex->__data.__count;
+
+ goto out;
+ }
+
+ /* We have to get the mutex. */
+ result = lll_timedlock_t64 (mutex->__data.__lock, abstime,
+ PTHREAD_MUTEX_PSHARED (mutex));
+
+ if (result != 0)
+ goto out;
+
+ /* Only locked once so far. */
+ mutex->__data.__count = 1;
+ break;
+
+ /* Error checking mutex. */
+ case PTHREAD_MUTEX_ERRORCHECK_NP:
+ /* Check whether we already hold the mutex. */
+ if (__glibc_unlikely (mutex->__data.__owner == id))
+ return EDEADLK;
+
+ /* Don't do lock elision on an error checking mutex. */
+ goto simple;
+
+ case PTHREAD_MUTEX_TIMED_NP:
+ FORCE_ELISION (mutex, goto elision);
+ simple:
+ /* Normal mutex. */
+ result = lll_timedlock_t64 (mutex->__data.__lock, abstime,
+ PTHREAD_MUTEX_PSHARED (mutex));
+ break;
+
+ case PTHREAD_MUTEX_TIMED_ELISION_NP:
+ elision: __attribute__((unused))
+ /* Don't record ownership */
+ return lll_timedlock_elision_t64 (mutex->__data.__lock,
+ mutex->__data.__spins,
+ abstime,
+ PTHREAD_MUTEX_PSHARED (mutex));
+
+
+ case PTHREAD_MUTEX_ADAPTIVE_NP:
+ if (! __is_smp)
+ goto simple;
+
+ if (lll_trylock (mutex->__data.__lock) != 0)
+ {
+ int cnt = 0;
+ int max_cnt = MIN (MAX_ADAPTIVE_COUNT,
+ mutex->__data.__spins * 2 + 10);
+ do
+ {
+ if (cnt++ >= max_cnt)
+ {
+ result = lll_timedlock_t64 (mutex->__data.__lock, abstime,
+ PTHREAD_MUTEX_PSHARED (mutex));
+ break;
+ }
+ atomic_spin_nop ();
+ }
+ while (lll_trylock (mutex->__data.__lock) != 0);
+
+ mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
+ }
+ break;
+
+ case PTHREAD_MUTEX_ROBUST_RECURSIVE_NP:
+ case PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP:
+ case PTHREAD_MUTEX_ROBUST_NORMAL_NP:
+ case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
+ &mutex->__data.__list.__next);
+ /* We need to set op_pending before starting the operation. Also
+ see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
+
+ oldval = mutex->__data.__lock;
+ /* This is set to FUTEX_WAITERS iff we might have shared the
+ FUTEX_WAITERS flag with other threads, and therefore need to keep it
+ set to avoid lost wake-ups. We have the same requirement in the
+ simple mutex algorithm. */
+ unsigned int assume_other_futex_waiters = 0;
+ while (1)
+ {
+ /* Try to acquire the lock through a CAS from 0 (not acquired) to
+ our TID | assume_other_futex_waiters. */
+ if (__glibc_likely (oldval == 0))
+ {
+ oldval
+ = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ id | assume_other_futex_waiters, 0);
+ if (__glibc_likely (oldval == 0))
+ break;
+ }
+
+ if ((oldval & FUTEX_OWNER_DIED) != 0)
+ {
+ /* The previous owner died. Try locking the mutex. */
+ int newval = id | (oldval & FUTEX_WAITERS)
+ | assume_other_futex_waiters;
+
+ newval
+ = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ newval, oldval);
+ if (newval != oldval)
+ {
+ oldval = newval;
+ continue;
+ }
+
+ /* We got the mutex. */
+ mutex->__data.__count = 1;
+ /* But it is inconsistent unless marked otherwise. */
+ mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
+
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
+ ENQUEUE_MUTEX (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
+
+ /* Note that we deliberately exit here. If we fall
+ through to the end of the function __nusers would be
+ incremented which is not correct because the old
+ owner has to be discounted. */
+ return EOWNERDEAD;
+ }
+
+ /* Check whether we already hold the mutex. */
+ if (__glibc_unlikely ((oldval & FUTEX_TID_MASK) == id))
+ {
+ int kind = PTHREAD_MUTEX_TYPE (mutex);
+ if (kind == PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP)
+ {
+ /* We do not need to ensure ordering wrt another memory
+ access. Also see comments at ENQUEUE_MUTEX. */
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
+ NULL);
+ return EDEADLK;
+ }
+
+ if (kind == PTHREAD_MUTEX_ROBUST_RECURSIVE_NP)
+ {
+ /* We do not need to ensure ordering wrt another memory
+ access. */
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
+ NULL);
+
+ /* Just bump the counter. */
+ if (__glibc_unlikely (mutex->__data.__count + 1 == 0))
+ /* Overflow of the counter. */
+ return EAGAIN;
+
+ ++mutex->__data.__count;
+
+ LIBC_PROBE (mutex_timedlock_acquired, 1, mutex);
+
+ return 0;
+ }
+ }
+
+ /* We are about to block; check whether the timeout is invalid. */
+ if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
+ return EINVAL;
+ /* Work around the fact that the kernel rejects negative timeout
+ values despite them being valid. */
+ if (__glibc_unlikely (abstime->tv_sec < 0))
+ return ETIMEDOUT;
+#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \
+ || !defined lll_futex_timed_wait_bitset)
+ struct timeval tv;
+ struct timespec rt;
+
+ /* Get the current time. */
+ (void) __gettimeofday (&tv, NULL);
+
+ /* Compute relative timeout. */
+ rt.tv_sec = abstime->tv_sec - tv.tv_sec;
+ rt.tv_nsec = abstime->tv_nsec - tv.tv_usec * 1000;
+ if (rt.tv_nsec < 0)
+ {
+ rt.tv_nsec += 1000000000;
+ --rt.tv_sec;
+ }
+
+ /* Already timed out? */
+ if (rt.tv_sec < 0)
+ return ETIMEDOUT;
+#endif
+
+ /* We cannot acquire the mutex nor has its owner died. Thus, try
+ to block using futexes. Set FUTEX_WAITERS if necessary so that
+ other threads are aware that there are potentially threads
+ blocked on the futex. Restart if oldval changed in the
+ meantime. */
+ if ((oldval & FUTEX_WAITERS) == 0)
+ {
+ if (atomic_compare_and_exchange_bool_acq (&mutex->__data.__lock,
+ oldval | FUTEX_WAITERS,
+ oldval)
+ != 0)
+ {
+ oldval = mutex->__data.__lock;
+ continue;
+ }
+ oldval |= FUTEX_WAITERS;
+ }
+
+ /* It is now possible that we share the FUTEX_WAITERS flag with
+ another thread; therefore, update assume_other_futex_waiters so
+ that we do not forget about this when handling other cases
+ above and thus do not cause lost wake-ups. */
+ assume_other_futex_waiters |= FUTEX_WAITERS;
+
+ /* Block using the futex. */
+#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \
+ || !defined lll_futex_timed_wait_bitset)
+ lll_futex_timed wait_64 (&mutex->__data.__lock, oldval,
+ &rt, PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
+#else
+ int err = lll_futex_timed_wait_bitset_t64 (&mutex->__data.__lock,
+ oldval, abstime, FUTEX_CLOCK_REALTIME,
+ PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
+ /* The futex call timed out. */
+ if (err == -ETIMEDOUT)
+ return -err;
+#endif
+ /* Reload current lock value. */
+ oldval = mutex->__data.__lock;
+ }
+
+ /* We have acquired the mutex; check if it is still consistent. */
+ if (__builtin_expect (mutex->__data.__owner
+ == PTHREAD_MUTEX_NOTRECOVERABLE, 0))
+ {
+ /* This mutex is now not recoverable. */
+ mutex->__data.__count = 0;
+ int private = PTHREAD_ROBUST_MUTEX_PSHARED (mutex);
+ lll_unlock (mutex->__data.__lock, private);
+ /* FIXME This violates the mutex destruction requirements. See
+ __pthread_mutex_unlock_full. */
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
+ return ENOTRECOVERABLE;
+ }
+
+ mutex->__data.__count = 1;
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
+ ENQUEUE_MUTEX (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
+ break;
+
+ /* The PI support requires the Linux futex system call. If that's not
+ available, pthread_mutex_init should never have allowed the type to
+ be set. So it will get the default case for an invalid type. */
+#ifdef __NR_futex
+ case PTHREAD_MUTEX_PI_RECURSIVE_NP:
+ case PTHREAD_MUTEX_PI_ERRORCHECK_NP:
+ case PTHREAD_MUTEX_PI_NORMAL_NP:
+ case PTHREAD_MUTEX_PI_ADAPTIVE_NP:
+ case PTHREAD_MUTEX_PI_ROBUST_RECURSIVE_NP:
+ case PTHREAD_MUTEX_PI_ROBUST_ERRORCHECK_NP:
+ case PTHREAD_MUTEX_PI_ROBUST_NORMAL_NP:
+ case PTHREAD_MUTEX_PI_ROBUST_ADAPTIVE_NP:
+ {
+ int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
+ int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
+
+ if (robust)
+ {
+ /* Note: robust PI futexes are signaled by setting bit 0. */
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
+ (void *) (((uintptr_t) &mutex->__data.__list.__next)
+ | 1));
+ /* We need to set op_pending before starting the operation. Also
+ see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
+ }
+
+ oldval = mutex->__data.__lock;
+
+ /* Check whether we already hold the mutex. */
+ if (__glibc_unlikely ((oldval & FUTEX_TID_MASK) == id))
+ {
+ if (kind == PTHREAD_MUTEX_ERRORCHECK_NP)
+ {
+ /* We do not need to ensure ordering wrt another memory
+ access. */
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
+ return EDEADLK;
+ }
+
+ if (kind == PTHREAD_MUTEX_RECURSIVE_NP)
+ {
+ /* We do not need to ensure ordering wrt another memory
+ access. */
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
+
+ /* Just bump the counter. */
+ if (__glibc_unlikely (mutex->__data.__count + 1 == 0))
+ /* Overflow of the counter. */
+ return EAGAIN;
+
+ ++mutex->__data.__count;
+
+ LIBC_PROBE (mutex_timedlock_acquired, 1, mutex);
+
+ return 0;
+ }
+ }
+
+ oldval = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ id, 0);
+
+ if (oldval != 0)
+ {
+ /* The mutex is locked. The kernel will now take care of
+ everything. The timeout value must be a relative value.
+ Convert it. */
+ int private = (robust
+ ? PTHREAD_ROBUST_MUTEX_PSHARED (mutex)
+ : PTHREAD_MUTEX_PSHARED (mutex));
+ INTERNAL_SYSCALL_DECL (__err);
+
+ int e;
+
+ if (abstime->tv_sec > INT_MAX)
+ {
+ e = EOVERFLOW;
+ }
+ else
+ {
+ struct timespec ts;
+ ts.tv_sec = abstime->tv_sec;
+ ts.tv_nsec = abstime->tv_nsec;
+ e = INTERNAL_SYSCALL (futex, __err, 4, &mutex->__data.__lock,
+ __lll_private_flag (FUTEX_LOCK_PI,
+ private), 1,
+ &ts);
+ }
+ if (INTERNAL_SYSCALL_ERROR_P (e, __err))
+ {
+ if (INTERNAL_SYSCALL_ERRNO (e, __err) == ETIMEDOUT)
+ return ETIMEDOUT;
+
+ if (INTERNAL_SYSCALL_ERRNO (e, __err) == ESRCH
+ || INTERNAL_SYSCALL_ERRNO (e, __err) == EDEADLK)
+ {
+ assert (INTERNAL_SYSCALL_ERRNO (e, __err) != EDEADLK
+ || (kind != PTHREAD_MUTEX_ERRORCHECK_NP
+ && kind != PTHREAD_MUTEX_RECURSIVE_NP));
+ /* ESRCH can happen only for non-robust PI mutexes where
+ the owner of the lock died. */
+ assert (INTERNAL_SYSCALL_ERRNO (e, __err) != ESRCH
+ || !robust);
+
+ /* Delay the thread until the timeout is reached.
+ Then return ETIMEDOUT. */
+ struct timespec reltime;
+ struct __timespec64 now;
+
+ INTERNAL_SYSCALL (clock_gettime64, __err, 2, CLOCK_REALTIME,
+ &now);
+ reltime.tv_sec = abstime->tv_sec - now.tv_sec;
+ reltime.tv_nsec = abstime->tv_nsec - now.tv_nsec;
+ if (reltime.tv_nsec < 0)
+ {
+ reltime.tv_nsec += 1000000000;
+ --reltime.tv_sec;
+ }
+ if (reltime.tv_sec >= 0)
+ while (nanosleep_not_cancel (&reltime, &reltime) != 0)
+ continue;
+
+ return ETIMEDOUT;
+ }
+
+ return INTERNAL_SYSCALL_ERRNO (e, __err);
+ }
+
+ oldval = mutex->__data.__lock;
+
+ assert (robust || (oldval & FUTEX_OWNER_DIED) == 0);
+ }
+
+ if (__glibc_unlikely (oldval & FUTEX_OWNER_DIED))
+ {
+ atomic_and (&mutex->__data.__lock, ~FUTEX_OWNER_DIED);
+
+ /* We got the mutex. */
+ mutex->__data.__count = 1;
+ /* But it is inconsistent unless marked otherwise. */
+ mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
+
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
+ ENQUEUE_MUTEX_PI (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
+
+ /* Note that we deliberately exit here. If we fall
+ through to the end of the function __nusers would be
+ incremented which is not correct because the old owner
+ has to be discounted. */
+ return EOWNERDEAD;
+ }
+
+ if (robust
+ && __builtin_expect (mutex->__data.__owner
+ == PTHREAD_MUTEX_NOTRECOVERABLE, 0))
+ {
+ /* This mutex is now not recoverable. */
+ mutex->__data.__count = 0;
+
+ INTERNAL_SYSCALL_DECL (__err);
+ INTERNAL_SYSCALL (futex, __err, 4, &mutex->__data.__lock,
+ __lll_private_flag (FUTEX_UNLOCK_PI,
+ PTHREAD_ROBUST_MUTEX_PSHARED (mutex)),
+ 0, 0);
+
+ /* To the kernel, this will be visible after the kernel has
+ acquired the mutex in the syscall. */
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
+ return ENOTRECOVERABLE;
+ }
+
+ mutex->__data.__count = 1;
+ if (robust)
+ {
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
+ ENQUEUE_MUTEX_PI (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
+ }
+ }
+ break;
+#endif /* __NR_futex. */
+
+ case PTHREAD_MUTEX_PP_RECURSIVE_NP:
+ case PTHREAD_MUTEX_PP_ERRORCHECK_NP:
+ case PTHREAD_MUTEX_PP_NORMAL_NP:
+ case PTHREAD_MUTEX_PP_ADAPTIVE_NP:
+ {
+ int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
+
+ oldval = mutex->__data.__lock;
+
+ /* Check whether we already hold the mutex. */
+ if (mutex->__data.__owner == id)
+ {
+ if (kind == PTHREAD_MUTEX_ERRORCHECK_NP)
+ return EDEADLK;
+
+ if (kind == PTHREAD_MUTEX_RECURSIVE_NP)
+ {
+ /* Just bump the counter. */
+ if (__glibc_unlikely (mutex->__data.__count + 1 == 0))
+ /* Overflow of the counter. */
+ return EAGAIN;
+
+ ++mutex->__data.__count;
+
+ LIBC_PROBE (mutex_timedlock_acquired, 1, mutex);
+
+ return 0;
+ }
+ }
+
+ int oldprio = -1, ceilval;
+ do
+ {
+ int ceiling = (oldval & PTHREAD_MUTEX_PRIO_CEILING_MASK)
+ >> PTHREAD_MUTEX_PRIO_CEILING_SHIFT;
+
+ if (__pthread_current_priority () > ceiling)
+ {
+ result = EINVAL;
+ failpp:
+ if (oldprio != -1)
+ __pthread_tpp_change_priority (oldprio, -1);
+ return result;
+ }
+
+ result = __pthread_tpp_change_priority (oldprio, ceiling);
+ if (result)
+ return result;
+
+ ceilval = ceiling << PTHREAD_MUTEX_PRIO_CEILING_SHIFT;
+ oldprio = ceiling;
+
+ oldval
+ = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ ceilval | 1, ceilval);
+
+ if (oldval == ceilval)
+ break;
+
+ do
+ {
+ oldval
+ = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ ceilval | 2,
+ ceilval | 1);
+
+ if ((oldval & PTHREAD_MUTEX_PRIO_CEILING_MASK) != ceilval)
+ break;
+
+ if (oldval != ceilval)
+ {
+ /* Reject invalid timeouts. */
+ if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
+ {
+ result = EINVAL;
+ goto failpp;
+ }
+
+ struct timeval tv;
+ struct timespec rt;
+
+ /* Get the current time. */
+ (void) __gettimeofday (&tv, NULL);
+
+ /* Compute relative timeout. */
+ rt.tv_sec = abstime->tv_sec - tv.tv_sec;
+ rt.tv_nsec = abstime->tv_nsec - tv.tv_usec * 1000;
+ if (rt.tv_nsec < 0)
+ {
+ rt.tv_nsec += 1000000000;
+ --rt.tv_sec;
+ }
+
+ /* Already timed out? */
+ if (rt.tv_sec < 0)
+ {
+ result = ETIMEDOUT;
+ goto failpp;
+ }
+
+ lll_futex_timed_wait (&mutex->__data.__lock,
+ ceilval | 2, &rt,
+ PTHREAD_MUTEX_PSHARED (mutex));
+ }
+ }
+ while (atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ ceilval | 2, ceilval)
+ != ceilval);
+ }
+ while ((oldval & PTHREAD_MUTEX_PRIO_CEILING_MASK) != ceilval);
+
+ assert (mutex->__data.__owner == 0);
+ mutex->__data.__count = 1;
+ }
+ break;
+
+ default:
+ /* Correct code cannot set any other type. */
+ return EINVAL;
+ }
+
+ if (result == 0)
+ {
+ /* Record the ownership. */
+ mutex->__data.__owner = id;
+ ++mutex->__data.__nusers;
+
+ LIBC_PROBE (mutex_timedlock_acquired, 1, mutex);
+ }
+
+ out:
+ return result;
+}
diff --git a/nptl/pthread_rwlock_common.c b/nptl/pthread_rwlock_common.c
index 846687e1cf..b07f86342d 100644
--- a/nptl/pthread_rwlock_common.c
+++ b/nptl/pthread_rwlock_common.c
@@ -507,6 +507,240 @@ __pthread_rwlock_rdlock_full (pthread_rwlock_t *rwlock,
return 0;
}
+/* 64-bit time version */
+
+static __always_inline int
+__pthread_rwlock_rdlock_full_t64 (pthread_rwlock_t *rwlock,
+ const struct __timespec64 *abstime)
+{
+ unsigned int r;
+
+ /* Make sure we are not holding the rwlock as a writer. This is a deadlock
+ situation we recognize and report. */
+ if (__glibc_unlikely (atomic_load_relaxed (&rwlock->__data.__cur_writer)
+ == THREAD_GETMEM (THREAD_SELF, tid)))
+ return EDEADLK;
+
+ /* If we prefer writers, recursive rdlock is disallowed, we are in a read
+ phase, and there are other readers present, we try to wait without
+ extending the read phase. We will be unblocked by either one of the
+ other active readers, or if the writer gives up WRLOCKED (e.g., on
+ timeout).
+ If there are no other readers, we simply race with any existing primary
+ writer; it would have been a race anyway, and changing the odds slightly
+ will likely not make a big difference. */
+ if (rwlock->__data.__flags == PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP)
+ {
+ r = atomic_load_relaxed (&rwlock->__data.__readers);
+ while (((r & PTHREAD_RWLOCK_WRPHASE) == 0)
+ && ((r & PTHREAD_RWLOCK_WRLOCKED) != 0)
+ && ((r >> PTHREAD_RWLOCK_READER_SHIFT) > 0))
+ {
+ /* TODO Spin first. */
+ /* Try setting the flag signaling that we are waiting without having
+ incremented the number of readers. Relaxed MO is fine because
+ this is just about waiting for a state change in __readers. */
+ if (atomic_compare_exchange_weak_relaxed
+ (&rwlock->__data.__readers, &r, r | PTHREAD_RWLOCK_RWAITING))
+ {
+ /* Wait for as long as the flag is set. An ABA situation is
+ harmless because the flag is just about the state of
+ __readers, and all threads set the flag under the same
+ conditions. */
+ while ((atomic_load_relaxed (&rwlock->__data.__readers)
+ & PTHREAD_RWLOCK_RWAITING) != 0)
+ {
+ int private = __pthread_rwlock_get_private (rwlock);
+ int err = futex_abstimed_wait_t64 (&rwlock->__data.__readers,
+ r, abstime, private);
+ /* We ignore EAGAIN and EINTR. On time-outs, we can just
+ return because we don't need to clean up anything. */
+ if (err == ETIMEDOUT)
+ return err;
+ }
+ /* It makes sense to not break out of the outer loop here
+ because we might be in the same situation again. */
+ }
+ else
+ {
+ /* TODO Back-off. */
+ }
+ }
+ }
+ /* Register as a reader, using an add-and-fetch so that R can be used as
+ expected value for future operations. Acquire MO so we synchronize with
+ prior writers as well as the last reader of the previous read phase (see
+ below). */
+ r = atomic_fetch_add_acquire (&rwlock->__data.__readers,
+ (1 << PTHREAD_RWLOCK_READER_SHIFT)) + (1 << PTHREAD_RWLOCK_READER_SHIFT);
+
+ /* Check whether there is an overflow in the number of readers. We assume
+ that the total number of threads is less than half the maximum number
+ of readers that we have bits for in __readers (i.e., with 32-bit int and
+ PTHREAD_RWLOCK_READER_SHIFT of 3, we assume there are less than
+ 1 << (32-3-1) concurrent threads).
+ If there is an overflow, we use a CAS to try to decrement the number of
+ readers if there still is an overflow situation. If so, we return
+ EAGAIN; if not, we are not a thread causing an overflow situation, and so
+ we just continue. Using a fetch-add instead of the CAS isn't possible
+ because other readers might release the lock concurrently, which could
+ make us the last reader and thus responsible for handing ownership over
+ to writers (which requires a CAS too to make the decrement and ownership
+ transfer indivisible). */
+ while (__glibc_unlikely (r >= PTHREAD_RWLOCK_READER_OVERFLOW))
+ {
+ /* Relaxed MO is okay because we just want to undo our registration and
+ cannot have changed the rwlock state substantially if the CAS
+ succeeds. */
+ if (atomic_compare_exchange_weak_relaxed (&rwlock->__data.__readers, &r,
+ r - (1 << PTHREAD_RWLOCK_READER_SHIFT)))
+ return EAGAIN;
+ }
+
+ /* We have registered as a reader, so if we are in a read phase, we have
+ acquired a read lock. This is also the reader--reader fast-path.
+ Even if there is a primary writer, we just return. If writers are to
+ be preferred and we are the only active reader, we could try to enter a
+ write phase to let the writer proceed. This would be okay because we
+ cannot have acquired the lock previously as a reader (which could result
+ in deadlock if we would wait for the primary writer to run). However,
+ this seems to be a corner case and handling it specially not be worth the
+ complexity. */
+ if (__glibc_likely ((r & PTHREAD_RWLOCK_WRPHASE) == 0))
+ return 0;
+
+ /* If there is no primary writer but we are in a write phase, we can try
+ to install a read phase ourself. */
+ while (((r & PTHREAD_RWLOCK_WRPHASE) != 0)
+ && ((r & PTHREAD_RWLOCK_WRLOCKED) == 0))
+ {
+ /* Try to enter a read phase: If the CAS below succeeds, we have
+ ownership; if it fails, we will simply retry and reassess the
+ situation.
+ Acquire MO so we synchronize with prior writers. */
+ if (atomic_compare_exchange_weak_acquire (&rwlock->__data.__readers, &r,
+ r ^ PTHREAD_RWLOCK_WRPHASE))
+ {
+ /* We started the read phase, so we are also responsible for
+ updating the write-phase futex. Relaxed MO is sufficient.
+ Note that there can be no other reader that we have to wake
+ because all other readers will see the read phase started by us
+ (or they will try to start it themselves); if a writer started
+ the read phase, we cannot have started it. Furthermore, we
+ cannot discard a PTHREAD_RWLOCK_FUTEX_USED flag because we will
+ overwrite the value set by the most recent writer (or the readers
+ before it in case of explicit hand-over) and we know that there
+ are no waiting readers. */
+ atomic_store_relaxed (&rwlock->__data.__wrphase_futex, 0);
+ return 0;
+ }
+ else
+ {
+ /* TODO Back off before retrying. Also see above. */
+ }
+ }
+
+ if ((r & PTHREAD_RWLOCK_WRPHASE) != 0)
+ {
+ /* We are in a write phase, and there must be a primary writer because
+ of the previous loop. Block until the primary writer gives up the
+ write phase. This case requires explicit hand-over using
+ __wrphase_futex.
+ However, __wrphase_futex might not have been set to 1 yet (either
+ because explicit hand-over to the writer is still ongoing, or because
+ the writer has started the write phase but does not yet have updated
+ __wrphase_futex). The least recent value of __wrphase_futex we can
+ read from here is the modification of the last read phase (because
+ we synchronize with the last reader in this read phase through
+ __readers; see the use of acquire MO on the fetch_add above).
+ Therefore, if we observe a value of 0 for __wrphase_futex, we need
+ to subsequently check that __readers now indicates a read phase; we
+ need to use acquire MO for this so that if we observe a read phase,
+ we will also see the modification of __wrphase_futex by the previous
+ writer. We then need to load __wrphase_futex again and continue to
+ wait if it is not 0, so that we do not skip explicit hand-over.
+ Relaxed MO is sufficient for the load from __wrphase_futex because
+ we just use it as an indicator for when we can proceed; we use
+ __readers and the acquire MO accesses to it to eventually read from
+ the proper stores to __wrphase_futex. */
+ unsigned int wpf;
+ bool ready = false;
+ for (;;)
+ {
+ while (((wpf = atomic_load_relaxed (&rwlock->__data.__wrphase_futex))
+ | PTHREAD_RWLOCK_FUTEX_USED) == (1 | PTHREAD_RWLOCK_FUTEX_USED))
+ {
+ int private = __pthread_rwlock_get_private (rwlock);
+ if (((wpf & PTHREAD_RWLOCK_FUTEX_USED) == 0)
+ && !atomic_compare_exchange_weak_relaxed
+ (&rwlock->__data.__wrphase_futex,
+ &wpf, wpf | PTHREAD_RWLOCK_FUTEX_USED))
+ continue;
+ int err = futex_abstimed_wait_t64 (&rwlock->__data.__wrphase_futex,
+ 1 | PTHREAD_RWLOCK_FUTEX_USED, abstime, private);
+ if (err == ETIMEDOUT)
+ {
+ /* If we timed out, we need to unregister. If no read phase
+ has been installed while we waited, we can just decrement
+ the number of readers. Otherwise, we just acquire the
+ lock, which is allowed because we give no precise timing
+ guarantees, and because the timeout is only required to
+ be in effect if we would have had to wait for other
+ threads (e.g., if futex_wait would time-out immediately
+ because the given absolute time is in the past). */
+ r = atomic_load_relaxed (&rwlock->__data.__readers);
+ while ((r & PTHREAD_RWLOCK_WRPHASE) != 0)
+ {
+ /* We don't need to make anything else visible to
+ others besides unregistering, so relaxed MO is
+ sufficient. */
+ if (atomic_compare_exchange_weak_relaxed
+ (&rwlock->__data.__readers, &r,
+ r - (1 << PTHREAD_RWLOCK_READER_SHIFT)))
+ return ETIMEDOUT;
+ /* TODO Back-off. */
+ }
+ /* Use the acquire MO fence to mirror the steps taken in the
+ non-timeout case. Note that the read can happen both
+ in the atomic_load above as well as in the failure case
+ of the CAS operation. */
+ atomic_thread_fence_acquire ();
+ /* We still need to wait for explicit hand-over, but we must
+ not use futex_wait anymore because we would just time out
+ in this case and thus make the spin-waiting we need
+ unnecessarily expensive. */
+ while ((atomic_load_relaxed (&rwlock->__data.__wrphase_futex)
+ | PTHREAD_RWLOCK_FUTEX_USED)
+ == (1 | PTHREAD_RWLOCK_FUTEX_USED))
+ {
+ /* TODO Back-off? */
+ }
+ ready = true;
+ break;
+ }
+ /* If we got interrupted (EINTR) or the futex word does not have the
+ expected value (EAGAIN), retry. */
+ }
+ if (ready)
+ /* See below. */
+ break;
+ /* We need acquire MO here so that we synchronize with the lock
+ release of the writer, and so that we observe a recent value of
+ __wrphase_futex (see below). */
+ if ((atomic_load_acquire (&rwlock->__data.__readers)
+ & PTHREAD_RWLOCK_WRPHASE) == 0)
+ /* We are in a read phase now, so the least recent modification of
+ __wrphase_futex we can read from is the store by the writer
+ with value 1. Thus, only now we can assume that if we observe
+ a value of 0, explicit hand-over is finished. Retry the loop
+ above one more time. */
+ ready = true;
+ }
+ }
+
+ return 0;
+}
+
static __always_inline void
__pthread_rwlock_wrunlock (pthread_rwlock_t *rwlock)
@@ -924,3 +1158,360 @@ __pthread_rwlock_wrlock_full (pthread_rwlock_t *rwlock,
THREAD_GETMEM (THREAD_SELF, tid));
return 0;
}
+
+/* 64-bit time version */
+
+static __always_inline int
+__pthread_rwlock_wrlock_full_t64 (pthread_rwlock_t *rwlock,
+ const struct __timespec64 *abstime)
+{
+ /* Make sure we are not holding the rwlock as a writer. This is a deadlock
+ situation we recognize and report. */
+ if (__glibc_unlikely (atomic_load_relaxed (&rwlock->__data.__cur_writer)
+ == THREAD_GETMEM (THREAD_SELF, tid)))
+ return EDEADLK;
+
+ /* First we try to acquire the role of primary writer by setting WRLOCKED;
+ if it was set before, there already is a primary writer. Acquire MO so
+ that we synchronize with previous primary writers.
+
+ We do not try to change to a write phase right away using a fetch_or
+ because we would have to reset it again and wake readers if there are
+ readers present (some readers could try to acquire the lock more than
+ once, so setting a write phase in the middle of this could cause
+ deadlock). Changing to a write phase eagerly would only speed up the
+ transition from a read phase to a write phase in the uncontended case,
+ but it would slow down the contended case if readers are preferred (which
+ is the default).
+ We could try to CAS from a state with no readers to a write phase, but
+ this could be less scalable if readers arrive and leave frequently. */
+ bool may_share_futex_used_flag = false;
+ unsigned int r = atomic_fetch_or_acquire (&rwlock->__data.__readers,
+ PTHREAD_RWLOCK_WRLOCKED);
+ if (__glibc_unlikely ((r & PTHREAD_RWLOCK_WRLOCKED) != 0))
+ {
+ /* There is another primary writer. */
+ bool prefer_writer =
+ (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP);
+ if (prefer_writer)
+ {
+ /* We register as a waiting writer, so that we can make use of
+ writer--writer hand-over. Relaxed MO is fine because we just
+ want to register. We assume that the maximum number of threads
+ is less than the capacity in __writers. */
+ atomic_fetch_add_relaxed (&rwlock->__data.__writers, 1);
+ }
+ for (;;)
+ {
+ /* TODO Spin until WRLOCKED is 0 before trying the CAS below.
+ But pay attention to not delay trying writer--writer hand-over
+ for too long (which we must try eventually anyway). */
+ if ((r & PTHREAD_RWLOCK_WRLOCKED) == 0)
+ {
+ /* Try to become the primary writer or retry. Acquire MO as in
+ the fetch_or above. */
+ if (atomic_compare_exchange_weak_acquire
+ (&rwlock->__data.__readers, &r,
+ r | PTHREAD_RWLOCK_WRLOCKED))
+ {
+ if (prefer_writer)
+ {
+ /* Unregister as a waiting writer. Note that because we
+ acquired WRLOCKED, WRHANDOVER will not be set.
+ Acquire MO on the CAS above ensures that
+ unregistering happens after the previous writer;
+ this sorts the accesses to __writers by all
+ primary writers in a useful way (e.g., any other
+ primary writer acquiring after us or getting it from
+ us through WRHANDOVER will see both our changes to
+ __writers).
+ ??? Perhaps this is not strictly necessary for
+ reasons we do not yet know of. */
+ atomic_fetch_add_relaxed (&rwlock->__data.__writers,
+ -1);
+ }
+ break;
+ }
+ /* Retry if the CAS fails (r will have been updated). */
+ continue;
+ }
+ /* If writer--writer hand-over is available, try to become the
+ primary writer this way by grabbing the WRHANDOVER token. If we
+ succeed, we own WRLOCKED. */
+ if (prefer_writer)
+ {
+ unsigned int w = atomic_load_relaxed
+ (&rwlock->__data.__writers);
+ if ((w & PTHREAD_RWLOCK_WRHANDOVER) != 0)
+ {
+ /* Acquire MO is required here so that we synchronize with
+ the writer that handed over WRLOCKED. We also need this
+ for the reload of __readers below because our view of
+ __readers must be at least as recent as the view of the
+ writer that handed over WRLOCKED; we must avoid an ABA
+ through WRHANDOVER, which could, for example, lead to us
+ assuming we are still in a write phase when in fact we
+ are not. */
+ if (atomic_compare_exchange_weak_acquire
+ (&rwlock->__data.__writers,
+ &w, (w - PTHREAD_RWLOCK_WRHANDOVER - 1)))
+ {
+ /* Reload so our view is consistent with the view of
+ the previous owner of WRLOCKED. See above. */
+ r = atomic_load_relaxed (&rwlock->__data.__readers);
+ break;
+ }
+ /* We do not need to reload __readers here. We should try
+ to perform writer--writer hand-over if possible; if it
+ is not possible anymore, we will reload __readers
+ elsewhere in this loop. */
+ continue;
+ }
+ }
+ /* We did not acquire WRLOCKED nor were able to use writer--writer
+ hand-over, so we block on __writers_futex. */
+ int private = __pthread_rwlock_get_private (rwlock);
+ unsigned int wf = atomic_load_relaxed
+ (&rwlock->__data.__writers_futex);
+ if (((wf & ~(unsigned int) PTHREAD_RWLOCK_FUTEX_USED) != 1)
+ || ((wf != (1 | PTHREAD_RWLOCK_FUTEX_USED))
+ && !atomic_compare_exchange_weak_relaxed
+ (&rwlock->__data.__writers_futex, &wf,
+ 1 | PTHREAD_RWLOCK_FUTEX_USED)))
+ {
+ /* If we cannot block on __writers_futex because there is no
+ primary writer, or we cannot set PTHREAD_RWLOCK_FUTEX_USED,
+ we retry. We must reload __readers here in case we cannot
+ block on __writers_futex so that we can become the primary
+ writer and are not stuck in a loop that just continuously
+ fails to block on __writers_futex. */
+ r = atomic_load_relaxed (&rwlock->__data.__readers);
+ continue;
+ }
+ /* We set the flag that signals that the futex is used, or we could
+ have set it if we had been faster than other waiters. As a
+ result, we may share the flag with an unknown number of other
+ writers. Therefore, we must keep this flag set when we acquire
+ the lock. We do not need to do this when we do not reach this
+ point here because then we are not part of the group that may
+ share the flag, and another writer will wake one of the writers
+ in this group. */
+ may_share_futex_used_flag = true;
+ int err = futex_abstimed_wait_t64 (&rwlock->__data.__writers_futex,
+ 1 | PTHREAD_RWLOCK_FUTEX_USED, abstime, private);
+ if (err == ETIMEDOUT)
+ {
+ if (prefer_writer)
+ {
+ /* We need to unregister as a waiting writer. If we are the
+ last writer and writer--writer hand-over is available,
+ we must make use of it because nobody else will reset
+ WRLOCKED otherwise. (If we use it, we simply pretend
+ that this happened before the timeout; see
+ pthread_rwlock_rdlock_full for the full reasoning.)
+ Also see the similar code above. */
+ unsigned int w = atomic_load_relaxed
+ (&rwlock->__data.__writers);
+ while (!atomic_compare_exchange_weak_acquire
+ (&rwlock->__data.__writers, &w,
+ (w == PTHREAD_RWLOCK_WRHANDOVER + 1 ? 0 : w - 1)))
+ {
+ /* TODO Back-off. */
+ }
+ if (w == PTHREAD_RWLOCK_WRHANDOVER + 1)
+ {
+ /* We must continue as primary writer. See above. */
+ r = atomic_load_relaxed (&rwlock->__data.__readers);
+ break;
+ }
+ }
+ /* We cleaned up and cannot have stolen another waiting writer's
+ futex wake-up, so just return. */
+ return ETIMEDOUT;
+ }
+ /* If we got interrupted (EINTR) or the futex word does not have the
+ expected value (EAGAIN), retry after reloading __readers. */
+ r = atomic_load_relaxed (&rwlock->__data.__readers);
+ }
+ /* Our snapshot of __readers is up-to-date at this point because we
+ either set WRLOCKED using a CAS or were handed over WRLOCKED from
+ another writer whose snapshot of __readers we inherit. */
+ }
+
+ /* If we are in a read phase and there are no readers, try to start a write
+ phase. */
+ while (((r & PTHREAD_RWLOCK_WRPHASE) == 0)
+ && ((r >> PTHREAD_RWLOCK_READER_SHIFT) == 0))
+ {
+ /* Acquire MO so that we synchronize with prior writers and do
+ not interfere with their updates to __writers_futex, as well
+ as regarding prior readers and their updates to __wrphase_futex,
+ respectively. */
+ if (atomic_compare_exchange_weak_acquire (&rwlock->__data.__readers,
+ &r, r | PTHREAD_RWLOCK_WRPHASE))
+ {
+ /* We have started a write phase, so need to enable readers to wait.
+ See the similar case in__pthread_rwlock_rdlock_full. */
+ atomic_store_relaxed (&rwlock->__data.__wrphase_futex, 1);
+ /* Make sure we fall through to the end of the function. */
+ r |= PTHREAD_RWLOCK_WRPHASE;
+ break;
+ }
+ /* TODO Back-off. */
+ }
+
+ /* We are the primary writer; enable blocking on __writers_futex. Relaxed
+ MO is sufficient for futex words; acquire MO on the previous
+ modifications of __readers ensures that this store happens after the
+ store of value 0 by the previous primary writer. */
+ atomic_store_relaxed (&rwlock->__data.__writers_futex,
+ 1 | (may_share_futex_used_flag ? PTHREAD_RWLOCK_FUTEX_USED : 0));
+
+ if (__glibc_unlikely ((r & PTHREAD_RWLOCK_WRPHASE) == 0))
+ {
+ /* We are not in a read phase and there are readers (because of the
+ previous loop). Thus, we have to wait for explicit hand-over from
+ one of these readers.
+ We basically do the same steps as for the similar case in
+ __pthread_rwlock_rdlock_full, except that we additionally might try
+ to directly hand over to another writer and need to wake up
+ other writers or waiting readers (i.e., PTHREAD_RWLOCK_RWAITING). */
+ unsigned int wpf;
+ bool ready = false;
+ for (;;)
+ {
+ while (((wpf = atomic_load_relaxed (&rwlock->__data.__wrphase_futex))
+ | PTHREAD_RWLOCK_FUTEX_USED) == PTHREAD_RWLOCK_FUTEX_USED)
+ {
+ int private = __pthread_rwlock_get_private (rwlock);
+ if (((wpf & PTHREAD_RWLOCK_FUTEX_USED) == 0)
+ && !atomic_compare_exchange_weak_relaxed
+ (&rwlock->__data.__wrphase_futex, &wpf,
+ PTHREAD_RWLOCK_FUTEX_USED))
+ continue;
+ int err = futex_abstimed_wait_t64 (&rwlock->__data.__wrphase_futex,
+ PTHREAD_RWLOCK_FUTEX_USED, abstime, private);
+ if (err == ETIMEDOUT)
+ {
+ if (rwlock->__data.__flags
+ != PTHREAD_RWLOCK_PREFER_READER_NP)
+ {
+ /* We try writer--writer hand-over. */
+ unsigned int w = atomic_load_relaxed
+ (&rwlock->__data.__writers);
+ if (w != 0)
+ {
+ /* We are about to hand over WRLOCKED, so we must
+ release __writers_futex too; otherwise, we'd have
+ a pending store, which could at least prevent
+ other threads from waiting using the futex
+ because it could interleave with the stores
+ by subsequent writers. In turn, this means that
+ we have to clean up when we do not hand over
+ WRLOCKED.
+ Release MO so that another writer that gets
+ WRLOCKED from us can take over our view of
+ __readers. */
+ unsigned int wf = atomic_exchange_relaxed
+ (&rwlock->__data.__writers_futex, 0);
+ while (w != 0)
+ {
+ if (atomic_compare_exchange_weak_release
+ (&rwlock->__data.__writers, &w,
+ w | PTHREAD_RWLOCK_WRHANDOVER))
+ {
+ /* Wake other writers. */
+ if ((wf & PTHREAD_RWLOCK_FUTEX_USED) != 0)
+ futex_wake
+ (&rwlock->__data.__writers_futex, 1,
+ private);
+ return ETIMEDOUT;
+ }
+ /* TODO Back-off. */
+ }
+ /* We still own WRLOCKED and someone else might set
+ a write phase concurrently, so enable waiting
+ again. Make sure we don't loose the flag that
+ signals whether there are threads waiting on
+ this futex. */
+ atomic_store_relaxed
+ (&rwlock->__data.__writers_futex, wf);
+ }
+ }
+ /* If we timed out and we are not in a write phase, we can
+ just stop being a primary writer. Otherwise, we just
+ acquire the lock. */
+ r = atomic_load_relaxed (&rwlock->__data.__readers);
+ if ((r & PTHREAD_RWLOCK_WRPHASE) == 0)
+ {
+ /* We are about to release WRLOCKED, so we must release
+ __writers_futex too; see the handling of
+ writer--writer hand-over above. */
+ unsigned int wf = atomic_exchange_relaxed
+ (&rwlock->__data.__writers_futex, 0);
+ while ((r & PTHREAD_RWLOCK_WRPHASE) == 0)
+ {
+ /* While we don't need to make anything from a
+ caller's critical section visible to other
+ threads, we need to ensure that our changes to
+ __writers_futex are properly ordered.
+ Therefore, use release MO to synchronize with
+ subsequent primary writers. Also wake up any
+ waiting readers as they are waiting because of
+ us. */
+ if (atomic_compare_exchange_weak_release
+ (&rwlock->__data.__readers, &r,
+ (r ^ PTHREAD_RWLOCK_WRLOCKED)
+ & ~(unsigned int) PTHREAD_RWLOCK_RWAITING))
+ {
+ /* Wake other writers. */
+ if ((wf & PTHREAD_RWLOCK_FUTEX_USED) != 0)
+ futex_wake (&rwlock->__data.__writers_futex,
+ 1, private);
+ /* Wake waiting readers. */
+ if ((r & PTHREAD_RWLOCK_RWAITING) != 0)
+ futex_wake (&rwlock->__data.__readers,
+ INT_MAX, private);
+ return ETIMEDOUT;
+ }
+ }
+ /* We still own WRLOCKED and someone else might set a
+ write phase concurrently, so enable waiting again.
+ Make sure we don't loose the flag that signals
+ whether there are threads waiting on this futex. */
+ atomic_store_relaxed (&rwlock->__data.__writers_futex,
+ wf);
+ }
+ /* Use the acquire MO fence to mirror the steps taken in the
+ non-timeout case. Note that the read can happen both
+ in the atomic_load above as well as in the failure case
+ of the CAS operation. */
+ atomic_thread_fence_acquire ();
+ /* We still need to wait for explicit hand-over, but we must
+ not use futex_wait anymore. */
+ while ((atomic_load_relaxed
+ (&rwlock->__data.__wrphase_futex)
+ | PTHREAD_RWLOCK_FUTEX_USED)
+ == PTHREAD_RWLOCK_FUTEX_USED)
+ {
+ /* TODO Back-off. */
+ }
+ ready = true;
+ break;
+ }
+ /* If we got interrupted (EINTR) or the futex word does not have
+ the expected value (EAGAIN), retry. */
+ }
+ /* See pthread_rwlock_rdlock_full. */
+ if (ready)
+ break;
+ if ((atomic_load_acquire (&rwlock->__data.__readers)
+ & PTHREAD_RWLOCK_WRPHASE) != 0)
+ ready = true;
+ }
+ }
+
+ atomic_store_relaxed (&rwlock->__data.__cur_writer,
+ THREAD_GETMEM (THREAD_SELF, tid));
+ return 0;
+}
diff --git a/nptl/pthread_rwlock_timedrdlock.c b/nptl/pthread_rwlock_timedrdlock.c
index 9f084f8c34..174ddf11cd 100644
--- a/nptl/pthread_rwlock_timedrdlock.c
+++ b/nptl/pthread_rwlock_timedrdlock.c
@@ -35,3 +35,22 @@ pthread_rwlock_timedrdlock (pthread_rwlock_t *rwlock,
return __pthread_rwlock_rdlock_full (rwlock, abstime);
}
+
+/* 64-bit time version */
+
+int
+pthread_rwlock_timedrdlock_t64 (pthread_rwlock_t *rwlock,
+ const struct __timespec64 *abstime)
+{
+ /* Make sure the passed in timeout value is valid. Note that the previous
+ implementation assumed that this check *must* not be performed if there
+ would in fact be no blocking; however, POSIX only requires that "the
+ validity of the abstime parameter need not be checked if the lock can be
+ immediately acquired" (i.e., we need not but may check it). */
+ /* ??? Just move this to __pthread_rwlock_rdlock_full? */
+ if (__glibc_unlikely (abstime->tv_nsec >= 1000000000
+ || abstime->tv_nsec < 0))
+ return EINVAL;
+
+ return __pthread_rwlock_rdlock_full_t64 (rwlock, abstime);
+}
diff --git a/nptl/pthread_rwlock_timedwrlock.c b/nptl/pthread_rwlock_timedwrlock.c
index 5626505d2c..7b954d4a12 100644
--- a/nptl/pthread_rwlock_timedwrlock.c
+++ b/nptl/pthread_rwlock_timedwrlock.c
@@ -35,3 +35,22 @@ pthread_rwlock_timedwrlock (pthread_rwlock_t *rwlock,
return __pthread_rwlock_wrlock_full (rwlock, abstime);
}
+
+/* 64-bit time version */
+
+int
+pthread_rwlock_timedwrlock_t64 (pthread_rwlock_t *rwlock,
+ const struct __timespec64 *abstime)
+{
+ /* Make sure the passed in timeout value is valid. Note that the previous
+ implementation assumed that this check *must* not be performed if there
+ would in fact be no blocking; however, POSIX only requires that "the
+ validity of the abstime parameter need not be checked if the lock can be
+ immediately acquired" (i.e., we need not but may check it). */
+ /* ??? Just move this to __pthread_rwlock_wrlock_full? */
+ if (__glibc_unlikely (abstime->tv_nsec >= 1000000000
+ || abstime->tv_nsec < 0))
+ return EINVAL;
+
+ return __pthread_rwlock_wrlock_full_t64 (rwlock, abstime);
+}
diff --git a/nptl/sem_timedwait.c b/nptl/sem_timedwait.c
index 22b0778cc2..893954a969 100644
--- a/nptl/sem_timedwait.c
+++ b/nptl/sem_timedwait.c
@@ -38,3 +38,21 @@ sem_timedwait (sem_t *sem, const struct timespec *abstime)
else
return __new_sem_wait_slow((struct new_sem *) sem, abstime);
}
+
+int
+sem_timedwait_t64 (sem_t *sem, const struct __timespec64 *abstime)
+{
+ if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
+ {
+ __set_errno (EINVAL);
+ return -1;
+ }
+
+ /* Check sem_wait.c for a more detailed explanation why it is required. */
+ __pthread_testcancel ();
+
+ if (__new_sem_wait_fast ((struct new_sem *) sem, 0) == 0)
+ return 0;
+ else
+ return __new_sem_wait_slow_t64 ((struct new_sem *) sem, abstime);
+}
diff --git a/nptl/sem_wait.c b/nptl/sem_wait.c
index 625bf08c83..b9f37f49bf 100644
--- a/nptl/sem_wait.c
+++ b/nptl/sem_wait.c
@@ -43,6 +43,30 @@ __new_sem_wait (sem_t *sem)
}
versioned_symbol (libpthread, __new_sem_wait, sem_wait, GLIBC_2_1);
+/* 64-bit time version */
+
+int
+__new_sem_wait_t64 (sem_t *sem)
+{
+ /* We need to check whether we need to act upon a cancellation request here
+ because POSIX specifies that cancellation points "shall occur" in
+ sem_wait and sem_timedwait, which also means that they need to check
+ this regardless whether they block or not (unlike "may occur"
+ functions). See the POSIX Rationale for this requirement: Section
+ "Thread Cancellation Overview" [1] and austin group issue #1076 [2]
+ for thoughs on why this may be a suboptimal design.
+
+ [1] http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xsh_chap02.html
+ [2] http://austingroupbugs.net/view.php?id=1076 for thoughts on why this
+ */
+ __pthread_testcancel ();
+
+ if (__new_sem_wait_fast ((struct new_sem *) sem, 0) == 0)
+ return 0;
+ else
+ return __new_sem_wait_slow_t64 ((struct new_sem *) sem, NULL);
+}
+
#if SHLIB_COMPAT (libpthread, GLIBC_2_0, GLIBC_2_1)
int
attribute_compat_text_section
diff --git a/nptl/sem_waitcommon.c b/nptl/sem_waitcommon.c
index a3412a0d35..d5bfe04aeb 100644
--- a/nptl/sem_waitcommon.c
+++ b/nptl/sem_waitcommon.c
@@ -119,6 +119,24 @@ do_futex_wait (struct new_sem *sem, const struct timespec *abstime)
return err;
}
+static int
+__attribute__ ((noinline))
+do_futex_wait_t64 (struct new_sem *sem, const struct __timespec64 *abstime)
+{
+ int err;
+
+#if __HAVE_64B_ATOMICS
+ err = futex_abstimed_wait_cancelable_t64 (
+ (unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0, abstime,
+ sem->private);
+#else
+ err = futex_abstimed_wait_cancelable_t64 (&sem->value, SEM_NWAITERS_MASK,
+ abstime, sem->private);
+#endif
+
+ return err;
+}
+
/* Fast path: Try to grab a token without blocking. */
static int
__new_sem_wait_fast (struct new_sem *sem, int definitive_result)
@@ -310,6 +328,160 @@ error:
return err;
}
+/* 64-bit time version */
+
+static int
+__attribute__ ((noinline))
+__new_sem_wait_slow_t64 (struct new_sem *sem, const struct __timespec64 *abstime)
+{
+ int err = 0;
+
+#if __HAVE_64B_ATOMICS
+ /* Add a waiter. Relaxed MO is sufficient because we can rely on the
+ ordering provided by the RMW operations we use. */
+ uint64_t d = atomic_fetch_add_relaxed (&sem->data,
+ (uint64_t) 1 << SEM_NWAITERS_SHIFT);
+
+ pthread_cleanup_push (__sem_wait_cleanup, sem);
+
+ /* Wait for a token to be available. Retry until we can grab one. */
+ for (;;)
+ {
+ /* If there is no token available, sleep until there is. */
+ if ((d & SEM_VALUE_MASK) == 0)
+ {
+ err = do_futex_wait_t64 (sem, abstime);
+ /* A futex return value of 0 or EAGAIN is due to a real or spurious
+ wake-up, or due to a change in the number of tokens. We retry in
+ these cases.
+ If we timed out, forward this to the caller.
+ EINTR is returned if we are interrupted by a signal; we
+ forward this to the caller. (See futex_wait and related
+ documentation. Before Linux 2.6.22, EINTR was also returned on
+ spurious wake-ups; we only support more recent Linux versions,
+ so do not need to consider this here.) */
+ if (err == ETIMEDOUT || err == EINTR)
+ {
+ __set_errno (err);
+ err = -1;
+ /* Stop being registered as a waiter. */
+ atomic_fetch_add_relaxed (&sem->data,
+ -((uint64_t) 1 << SEM_NWAITERS_SHIFT));
+ break;
+ }
+ /* Relaxed MO is sufficient; see below. */
+ d = atomic_load_relaxed (&sem->data);
+ }
+ else
+ {
+ /* Try to grab both a token and stop being a waiter. We need
+ acquire MO so this synchronizes with all token providers (i.e.,
+ the RMW operation we read from or all those before it in
+ modification order; also see sem_post). On the failure path,
+ relaxed MO is sufficient because we only eventually need the
+ up-to-date value; the futex_wait or the CAS perform the real
+ work. */
+ if (atomic_compare_exchange_weak_acquire (&sem->data,
+ &d, d - 1 - ((uint64_t) 1 << SEM_NWAITERS_SHIFT)))
+ {
+ err = 0;
+ break;
+ }
+ }
+ }
+
+ pthread_cleanup_pop (0);
+#else
+ /* The main difference to the 64b-atomics implementation is that we need to
+ access value and nwaiters in separate steps, and that the nwaiters bit
+ in the value can temporarily not be set even if nwaiters is nonzero.
+ We work around incorrectly unsetting the nwaiters bit by letting sem_wait
+ set the bit again and waking the number of waiters that could grab a
+ token. There are two additional properties we need to ensure:
+ (1) We make sure that whenever unsetting the bit, we see the increment of
+ nwaiters by the other thread that set the bit. IOW, we will notice if
+ we make a mistake.
+ (2) When setting the nwaiters bit, we make sure that we see the unsetting
+ of the bit by another waiter that happened before us. This avoids having
+ to blindly set the bit whenever we need to block on it. We set/unset
+ the bit while having incremented nwaiters (i.e., are a registered
+ waiter), and the problematic case only happens when one waiter indeed
+ followed another (i.e., nwaiters was never larger than 1); thus, this
+ works similarly as with a critical section using nwaiters (see the MOs
+ and related comments below).
+
+ An alternative approach would be to unset the bit after decrementing
+ nwaiters; however, that would result in needing Dekker-like
+ synchronization and thus full memory barriers. We also would not be able
+ to prevent misspeculation, so this alternative scheme does not seem
+ beneficial. */
+ unsigned int v;
+
+ /* Add a waiter. We need acquire MO so this synchronizes with the release
+ MO we use when decrementing nwaiters below; it ensures that if another
+ waiter unset the bit before us, we see that and set it again. Also see
+ property (2) above. */
+ atomic_fetch_add_acquire (&sem->nwaiters, 1);
+
+ pthread_cleanup_push (__sem_wait_cleanup, sem);
+
+ /* Wait for a token to be available. Retry until we can grab one. */
+ /* We do not need any ordering wrt. to this load's reads-from, so relaxed
+ MO is sufficient. The acquire MO above ensures that in the problematic
+ case, we do see the unsetting of the bit by another waiter. */
+ v = atomic_load_relaxed (&sem->value);
+ do
+ {
+ do
+ {
+ /* We are about to block, so make sure that the nwaiters bit is
+ set. We need release MO on the CAS to ensure that when another
+ waiter unsets the nwaiters bit, it will also observe that we
+ incremented nwaiters in the meantime (also see the unsetting of
+ the bit below). Relaxed MO on CAS failure is sufficient (see
+ above). */
+ do
+ {
+ if ((v & SEM_NWAITERS_MASK) != 0)
+ break;
+ }
+ while (!atomic_compare_exchange_weak_release (&sem->value,
+ &v, v | SEM_NWAITERS_MASK));
+ /* If there is no token, wait. */
+ if ((v >> SEM_VALUE_SHIFT) == 0)
+ {
+ /* See __HAVE_64B_ATOMICS variant. */
+ err = do_futex_wait_t64 (sem, abstime);
+ if (err == ETIMEDOUT || err == EINTR)
+ {
+ __set_errno (err);
+ err = -1;
+ goto error;
+ }
+ err = 0;
+ /* We blocked, so there might be a token now. Relaxed MO is
+ sufficient (see above). */
+ v = atomic_load_relaxed (&sem->value);
+ }
+ }
+ /* If there is no token, we must not try to grab one. */
+ while ((v >> SEM_VALUE_SHIFT) == 0);
+ }
+ /* Try to grab a token. We need acquire MO so this synchronizes with
+ all token providers (i.e., the RMW operation we read from or all those
+ before it in modification order; also see sem_post). */
+ while (!atomic_compare_exchange_weak_acquire (&sem->value,
+ &v, v - (1 << SEM_VALUE_SHIFT)));
+
+error:
+ pthread_cleanup_pop (0);
+
+ __sem_wait_32_finish (sem);
+#endif
+
+ return err;
+}
+
/* Stop being a registered waiter (non-64b-atomics code only). */
#if !__HAVE_64B_ATOMICS
static void
diff --git a/rt/Versions b/rt/Versions
index 1eef2e604f..a1c98a8576 100644
--- a/rt/Versions
+++ b/rt/Versions
@@ -48,5 +48,6 @@ librt {
__timerfd_settime64;
__mq_timedreceive_t64;
__mq_timedsend_t64;
+ __aio_suspend_t64;
}
}
diff --git a/sysdeps/nptl/aio_misc.h b/sysdeps/nptl/aio_misc.h
index 47b1a36479..3c74f8484c 100644
--- a/sysdeps/nptl/aio_misc.h
+++ b/sysdeps/nptl/aio_misc.h
@@ -71,4 +71,43 @@
} \
} while (0)
+#define AIO_MISC_WAIT_T64(result, futex, timeout, cancel) \
+ do { \
+ volatile unsigned int *futexaddr = &futex; \
+ unsigned int oldval = futex; \
+ \
+ if (oldval != 0) \
+ { \
+ pthread_mutex_unlock (&__aio_requests_mutex); \
+ \
+ int oldtype; \
+ if (cancel) \
+ oldtype = LIBC_CANCEL_ASYNC (); \
+ \
+ int status; \
+ do \
+ { \
+ status = futex_reltimed_wait_t64 ((unsigned int *) futexaddr, \
+ oldval, timeout, FUTEX_PRIVATE);\
+ if (status != EAGAIN) \
+ break; \
+ \
+ oldval = *futexaddr; \
+ } \
+ while (oldval != 0); \
+ \
+ if (cancel) \
+ LIBC_CANCEL_RESET (oldtype); \
+ \
+ if (status == EINTR) \
+ result = EINTR; \
+ else if (status == ETIMEDOUT) \
+ result = EAGAIN; \
+ else \
+ assert (status == 0 || status == EAGAIN); \
+ \
+ pthread_mutex_lock (&__aio_requests_mutex); \
+ } \
+ } while (0)
+
#include_next <aio_misc.h>
diff --git a/sysdeps/nptl/lowlevellock.h b/sysdeps/nptl/lowlevellock.h
index 54e3c28b0b..cedc9daa05 100644
--- a/sysdeps/nptl/lowlevellock.h
+++ b/sysdeps/nptl/lowlevellock.h
@@ -122,6 +122,10 @@ extern void __lll_lock_wait (int *futex, int private) attribute_hidden;
extern int __lll_timedlock_wait (int *futex, const struct timespec *,
int private) attribute_hidden;
+extern int __lll_timedlock_wait_t64 (int *futex,
+ const struct __timespec64 *,
+ int private) attribute_hidden;
+
/* As __lll_lock, but with a timeout. If the timeout occurs then return
ETIMEDOUT. If ABSTIME is invalid, return EINVAL. */
@@ -138,6 +142,19 @@ extern int __lll_timedlock_wait (int *futex, const struct timespec *,
#define lll_timedlock(futex, abstime, private) \
__lll_timedlock (&(futex), abstime, private)
+#define __lll_timedlock_t64(futex, abstime, private) \
+ ({ \
+ int *__futex = (futex); \
+ int __val = 0; \
+ \
+ if (__glibc_unlikely \
+ (atomic_compare_and_exchange_bool_acq (__futex, 1, 0))) \
+ __val = __lll_timedlock_wait_t64 (__futex, abstime, private); \
+ __val; \
+ })
+#define lll_timedlock_t64(futex, abstime, private) \
+ __lll_timedlock_t64 (&(futex), abstime, private)
+
/* This is an expression rather than a statement even though its value is
void, so that it can be used in a comma expression or as an expression
diff --git a/sysdeps/pthread/aio_suspend.c b/sysdeps/pthread/aio_suspend.c
index c739285c6a..8a38754a12 100644
--- a/sysdeps/pthread/aio_suspend.c
+++ b/sysdeps/pthread/aio_suspend.c
@@ -254,3 +254,167 @@ aio_suspend (const struct aiocb *const list[], int nent,
}
weak_alias (aio_suspend, aio_suspend64)
+
+#ifdef DONT_NEED_AIO_MISC_COND
+static int
+__attribute__ ((noinline))
+do_aio_misc_wait_t64 (unsigned int *cntr,
+ const struct __timespec64 *timeout)
+{
+ int result = 0;
+
+ AIO_MISC_WAIT_T64 (result, *cntr, timeout, 1);
+
+ return result;
+}
+#endif
+
+int
+aio_suspend_t64 (const struct aiocb *const list[], int nent,
+ const struct __timespec64 *timeout)
+{
+ if (__glibc_unlikely (nent < 0))
+ {
+ __set_errno (EINVAL);
+ return -1;
+ }
+
+ struct waitlist waitlist[nent];
+ struct requestlist *requestlist[nent];
+#ifndef DONT_NEED_AIO_MISC_COND
+ pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+#endif
+ int cnt;
+ bool any = false;
+ int result = 0;
+ unsigned int cntr = 1;
+
+ /* Request the mutex. */
+ pthread_mutex_lock (&__aio_requests_mutex);
+
+ /* There is not yet a finished request. Signal the request that
+ we are working for it. */
+ for (cnt = 0; cnt < nent; ++cnt)
+ if (list[cnt] != NULL)
+ {
+ if (list[cnt]->__error_code == EINPROGRESS)
+ {
+ requestlist[cnt] = __aio_find_req ((aiocb_union *) list[cnt]);
+
+ if (requestlist[cnt] != NULL)
+ {
+#ifndef DONT_NEED_AIO_MISC_COND
+ waitlist[cnt].cond = &cond;
+#endif
+ waitlist[cnt].result = NULL;
+ waitlist[cnt].next = requestlist[cnt]->waiting;
+ waitlist[cnt].counterp = &cntr;
+ waitlist[cnt].sigevp = NULL;
+#ifdef BROKEN_THREAD_SIGNALS
+ waitlist[cnt].caller_pid = 0; /* Not needed. */
+#endif
+ requestlist[cnt]->waiting = &waitlist[cnt];
+ any = true;
+ }
+ else
+ /* We will never suspend. */
+ break;
+ }
+ else
+ /* We will never suspend. */
+ break;
+ }
+
+
+ /* Only if none of the entries is NULL or finished to be wait. */
+ if (cnt == nent && any)
+ {
+ struct clparam clparam =
+ {
+ .list = list,
+ .waitlist = waitlist,
+ .requestlist = requestlist,
+#ifndef DONT_NEED_AIO_MISC_COND
+ .cond = &cond,
+#endif
+ .nent = nent
+ };
+
+ pthread_cleanup_push (cleanup, &clparam);
+
+#ifdef DONT_NEED_AIO_MISC_COND
+ result = do_aio_misc_wait_t64 (&cntr, timeout);
+#else
+ if (timeout == NULL)
+ result = pthread_cond_wait (&cond, &__aio_requests_mutex);
+ else
+ {
+ /* We have to convert the relative timeout value into an
+ absolute time value with pthread_cond_timedwait expects. */
+ struct timeval now;
+ struct timespec abstime;
+
+ __gettimeofday (&now, NULL);
+ abstime.tv_nsec = timeout->tv_nsec + now.tv_usec * 1000;
+ abstime.tv_sec = timeout->tv_sec + now.tv_sec;
+ if (abstime.tv_nsec >= 1000000000)
+ {
+ abstime.tv_nsec -= 1000000000;
+ abstime.tv_sec += 1;
+ }
+
+ result = __pthread_cond_timedwait_t64 (&cond,
+ &__aio_requests_mutex,
+ &abstime);
+ }
+#endif
+
+ pthread_cleanup_pop (0);
+ }
+
+ /* Now remove the entry in the waiting list for all requests
+ which didn't terminate. */
+ while (cnt-- > 0)
+ if (list[cnt] != NULL && list[cnt]->__error_code == EINPROGRESS)
+ {
+ struct waitlist **listp;
+
+ assert (requestlist[cnt] != NULL);
+
+ /* There is the chance that we cannot find our entry anymore. This
+ could happen if the request terminated and restarted again. */
+ listp = &requestlist[cnt]->waiting;
+ while (*listp != NULL && *listp != &waitlist[cnt])
+ listp = &(*listp)->next;
+
+ if (*listp != NULL)
+ *listp = (*listp)->next;
+ }
+
+#ifndef DONT_NEED_AIO_MISC_COND
+ /* Release the conditional variable. */
+ if (__glibc_unlikely (pthread_cond_destroy (&cond) != 0))
+ /* This must never happen. */
+ abort ();
+#endif
+
+ if (result != 0)
+ {
+#ifndef DONT_NEED_AIO_MISC_COND
+ /* An error occurred. Possibly it's ETIMEDOUT. We have to translate
+ the timeout error report of `pthread_cond_timedwait' to the
+ form expected from `aio_suspend'. */
+ if (result == ETIMEDOUT)
+ __set_errno (EAGAIN);
+ else
+#endif
+ __set_errno (result);
+
+ result = -1;
+ }
+
+ /* Release the mutex. */
+ pthread_mutex_unlock (&__aio_requests_mutex);
+
+ return result;
+}
diff --git a/sysdeps/unix/sysv/linux/futex-internal.h b/sysdeps/unix/sysv/linux/futex-internal.h
index 1386807f5b..ceffa906c1 100644
--- a/sysdeps/unix/sysv/linux/futex-internal.h
+++ b/sysdeps/unix/sysv/linux/futex-internal.h
@@ -131,6 +131,32 @@ futex_reltimed_wait (unsigned int *futex_word, unsigned int expected,
}
}
+/* 64-bit time version */
+static __always_inline int
+futex_reltimed_wait_t64 (unsigned int *futex_word, unsigned int expected,
+ const struct __timespec64 *reltime, int private)
+{
+ int err = lll_futex_timed_wait_t64 (futex_word, expected, reltime,
+ private);
+ switch (err)
+ {
+ case 0:
+ case -EAGAIN:
+ case -EINTR:
+ case -ETIMEDOUT:
+ return -err;
+
+ case -EFAULT: /* Must have been caused by a glibc or application bug. */
+ case -EINVAL: /* Either due to wrong alignment or due to the timeout not
+ being normalized. Must have been caused by a glibc or
+ application bug. */
+ case -ENOSYS: /* Must have been caused by a glibc bug. */
+ /* No other errors are documented at this time. */
+ default:
+ futex_fatal_error ();
+ }
+}
+
/* See sysdeps/nptl/futex-internal.h for details. */
static __always_inline int
futex_reltimed_wait_cancelable (unsigned int *futex_word,
@@ -160,6 +186,37 @@ futex_reltimed_wait_cancelable (unsigned int *futex_word,
}
}
+/* 64-bit time version */
+
+static __always_inline int
+futex_reltimed_wait_cancelable_t64 (unsigned int *futex_word,
+ unsigned int expected,
+ const struct __timespec64 *reltime,
+ int private)
+{
+ int oldtype;
+ oldtype = __pthread_enable_asynccancel ();
+ int err = lll_futex_timed_wait_t64 (futex_word, expected, reltime, private);
+ __pthread_disable_asynccancel (oldtype);
+ switch (err)
+ {
+ case 0:
+ case -EAGAIN:
+ case -EINTR:
+ case -ETIMEDOUT:
+ return -err;
+
+ case -EFAULT: /* Must have been caused by a glibc or application bug. */
+ case -EINVAL: /* Either due to wrong alignment or due to the timeout not
+ being normalized. Must have been caused by a glibc or
+ application bug. */
+ case -ENOSYS: /* Must have been caused by a glibc bug. */
+ /* No other errors are documented at this time. */
+ default:
+ futex_fatal_error ();
+ }
+}
+
/* See sysdeps/nptl/futex-internal.h for details. */
static __always_inline int
futex_abstimed_wait (unsigned int *futex_word, unsigned int expected,
@@ -190,6 +247,36 @@ futex_abstimed_wait (unsigned int *futex_word, unsigned int expected,
}
}
+/* 64-bit time version */
+static __always_inline int
+futex_abstimed_wait_t64 (unsigned int *futex_word, unsigned int expected,
+ const struct __timespec64 *abstime, int private)
+{
+ /* Work around the fact that the kernel rejects negative timeout values
+ despite them being valid. */
+ if (__glibc_unlikely ((abstime != NULL) && (abstime->tv_sec < 0)))
+ return ETIMEDOUT;
+ int err = lll_futex_timed_wait_bitset_t64 (futex_word, expected, abstime,
+ FUTEX_CLOCK_REALTIME, private);
+ switch (err)
+ {
+ case 0:
+ case -EAGAIN:
+ case -EINTR:
+ case -ETIMEDOUT:
+ return -err;
+
+ case -EFAULT: /* Must have been caused by a glibc or application bug. */
+ case -EINVAL: /* Either due to wrong alignment or due to the timeout not
+ being normalized. Must have been caused by a glibc or
+ application bug. */
+ case -ENOSYS: /* Must have been caused by a glibc bug. */
+ /* No other errors are documented at this time. */
+ default:
+ futex_fatal_error ();
+ }
+}
+
/* See sysdeps/nptl/futex-internal.h for details. */
static __always_inline int
futex_abstimed_wait_cancelable (unsigned int *futex_word,
@@ -224,6 +311,42 @@ futex_abstimed_wait_cancelable (unsigned int *futex_word,
}
}
+/* 64-bit time version */
+
+static __always_inline int
+futex_abstimed_wait_cancelable_t64 (unsigned int *futex_word,
+ unsigned int expected,
+ const struct __timespec64 *abstime,
+ int private)
+{
+ /* Work around the fact that the kernel rejects negative timeout values
+ despite them being valid. */
+ if (__glibc_unlikely ((abstime != NULL) && (abstime->tv_sec < 0)))
+ return ETIMEDOUT;
+ int oldtype;
+ oldtype = __pthread_enable_asynccancel ();
+ int err = lll_futex_timed_wait_bitset_t64 (futex_word, expected, abstime,
+ FUTEX_CLOCK_REALTIME, private);
+ __pthread_disable_asynccancel (oldtype);
+ switch (err)
+ {
+ case 0:
+ case -EAGAIN:
+ case -EINTR:
+ case -ETIMEDOUT:
+ return -err;
+
+ case -EFAULT: /* Must have been caused by a glibc or application bug. */
+ case -EINVAL: /* Either due to wrong alignment or due to the timeout not
+ being normalized. Must have been caused by a glibc or
+ application bug. */
+ case -ENOSYS: /* Must have been caused by a glibc bug. */
+ /* No other errors are documented at this time. */
+ default:
+ futex_fatal_error ();
+ }
+}
+
/* See sysdeps/nptl/futex-internal.h for details. */
static __always_inline void
futex_wake (unsigned int *futex_word, int processes_to_wake, int private)
diff --git a/sysdeps/unix/sysv/linux/lowlevellock-futex.h b/sysdeps/unix/sysv/linux/lowlevellock-futex.h
index bb4fbae13b..e1cad10544 100644
--- a/sysdeps/unix/sysv/linux/lowlevellock-futex.h
+++ b/sysdeps/unix/sysv/linux/lowlevellock-futex.h
@@ -97,6 +97,16 @@
__lll_private_flag (FUTEX_WAIT, private), \
val, timeout)
+#define lll_futex_timed_wait_t64(futexp, val, timeout, private) \
+ ({ \
+ struct timespec ts; \
+ ts.tv_sec = timeout->tv_sec; \
+ ts.tv_nsec = timeout->tv_nsec; \
+ lll_futex_syscall (4, futexp, \
+ __lll_private_flag (FUTEX_WAIT, private), \
+ val, &ts); \
+ })
+
#define lll_futex_timed_wait_bitset(futexp, val, timeout, clockbit, private) \
lll_futex_syscall (6, futexp, \
__lll_private_flag (FUTEX_WAIT_BITSET | (clockbit), \
@@ -104,6 +114,18 @@
val, timeout, NULL /* Unused. */, \
FUTEX_BITSET_MATCH_ANY)
+#define lll_futex_timed_wait_bitset_t64(futexp, val, timeout, clockbit, private) \
+ ({ \
+ struct timespec ts; \
+ ts.tv_sec = timeout->tv_sec; \
+ ts.tv_nsec = timeout->tv_nsec; \
+ lll_futex_syscall (6, futexp, \
+ __lll_private_flag (FUTEX_WAIT_BITSET | (clockbit), \
+ private), \
+ val, &ts, NULL /* Unused. */, \
+ FUTEX_BITSET_MATCH_ANY); \
+ })
+
#define lll_futex_wake(futexp, nr, private) \
lll_futex_syscall (4, futexp, \
__lll_private_flag (FUTEX_WAKE, private), nr, 0)
--
2.11.0