[PATCH] nptl: Fix PTHREAD_PRIO_PROTECT timed lock
Adhemerval Zanella
adhemerval.zanella@linaro.org
Fri Nov 27 11:22:27 GMT 2020
On 26/11/2020 09:51, Adhemerval Zanella wrote:
>
>
> On 26/11/2020 09:15, Adhemerval Zanella wrote:
>>
>>
>> On 26/11/2020 08:27, Mike Crowe wrote:
>>> On Wednesday 25 November 2020 at 17:27:23 -0300, Adhemerval Zanella wrote:
>>>> The 878fe624d4 changed lll_futex_timed_wait, which expects a relative
>>>> timeout, with a __futex_abstimed_wait64, which expects an absolute
>>>> timeout. However the code still passes a relative timeout.
>>>>
>>>> Also, the PTHREAD_PRIO_PROTECT support for clocks different than
>>>> CLOCK_REALTIME was broken since the inclusion of
>>>> pthread_mutex_clocklock (9d20e22e46) since lll_futex_timed_wait
>>>> always use CLOCK_REALTIME.
>>>>
>>>> This patch fixes by removing the relative time calculation. It
>>>> also adds some xtests that tests both thread and inter-process
>>>> usage.
>>>>
>>>> Checked on x86_64-linux-gnu.
>>>> ---
>>>> nptl/Makefile | 3 ++-
>>>> nptl/pthread_mutex_timedlock.c | 29 +++++------------------------
>>>> nptl/tst-mutexpp5.c | 2 ++
>>>> nptl/tst-mutexpp9.c | 2 ++
>>>> sysdeps/pthread/tst-mutex5.c | 12 +++++++++++-
>>>> sysdeps/pthread/tst-mutex9.c | 13 ++++++++++++-
>>>> 6 files changed, 34 insertions(+), 27 deletions(-)
>>>> create mode 100644 nptl/tst-mutexpp5.c
>>>> create mode 100644 nptl/tst-mutexpp9.c
>>>>
>>>> diff --git a/nptl/Makefile b/nptl/Makefile
>>>> index a48426a396..94d805f0d4 100644
>>>> --- a/nptl/Makefile
>>>> +++ b/nptl/Makefile
>>>> @@ -309,7 +309,8 @@ tests-internal := tst-robustpi8 tst-rwlock19 tst-rwlock20 \
>>>> tst-setgetname \
>>>>
>>>> xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \
>>>> - tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups
>>>> + tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups \
>>>> + tst-mutexpp5 tst-mutexpp9
>>>>
>>>> # This test can run into task limits because of a linux kernel bug
>>>> # and then cause the make process to fail too, see bug 24537.
>>>> diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
>>>> index aaaafa21ce..74adffe790 100644
>>>> --- a/nptl/pthread_mutex_timedlock.c
>>>> +++ b/nptl/pthread_mutex_timedlock.c
>>>> @@ -547,30 +547,11 @@ __pthread_mutex_clocklock_common (pthread_mutex_t *mutex,
>>>> goto failpp;
>>>> }
>>>>
>>>> - struct __timespec64 rt;
>>>> -
>>>> - /* Get the current time. */
>>>> - __clock_gettime64 (CLOCK_REALTIME, &rt);
>>>> -
>>>> - /* Compute relative timeout. */
>>>> - rt.tv_sec = abstime->tv_sec - rt.tv_sec;
>>>> - rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
>>>> - if (rt.tv_nsec < 0)
>>>> - {
>>>> - rt.tv_nsec += 1000000000;
>>>> - --rt.tv_sec;
>>>> - }
>>>> -
>>>> - /* Already timed out? */
>>>> - if (rt.tv_sec < 0)
>>>> - {
>>>> - result = ETIMEDOUT;
>>>> - goto failpp;
>>>> - }
>>>> -
>>>> - __futex_abstimed_wait64 (
>>>> - (unsigned int *) &mutex->__data.__lock, clockid,
>>>> - ceilval | 2, &rt, PTHREAD_MUTEX_PSHARED (mutex));
>>>> + int e = __futex_abstimed_wait64 (
>>>> + (unsigned int *) &mutex->__data.__lock, ceilval | 2,
>>>> + clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
>>>> + if (e == ETIMEDOUT)
>>>> + return ETIMEDOUT;
>>>
>>> I'm worried that futex could return other errors here which would cause a
>>> busy infinite loop. However, my attempts to provoke EINVAL have failed
>>> since the validity of abstime.tv_nsec is checked earlier. Presumably we
>>> should never get this far if EOVERFLOW could be returned? (If there is a
>>> problem here, then it also affects other mutex types which have similar
>>> code.)
>>
>> Revising the calls of futex-internal.h which might pass an timeout where
>> EOVERFLOW might happen, I see nptl code requires some fixes.
>>
>> At nptl/pthread_mutex_timedlock.c for robust mutexes:
>>
>> 138 case PTHREAD_MUTEX_ROBUST_RECURSIVE_NP:
>> 139 case PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP:
>> 140 case PTHREAD_MUTEX_ROBUST_NORMAL_NP:
>> 141 case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
>> [...]
>> 235 /* We are about to block; check whether the timeout is invalid. */
>> 236 if (! valid_nanoseconds (abstime->tv_nsec))
>> 237 return EINVAL;
>> 238 /* Work around the fact that the kernel rejects negative timeout
>> 239 values despite them being valid. */
>> 240 if (__glibc_unlikely (abstime->tv_sec < 0))
>> 241 return ETIMEDOUT;
>>
>> The FUTEX_WAITERS will trigger a futex wake on pthread_mutex_unlock, so
>> this check is an optimization to avoid it. It could be removed since
>> __futex_abstimed_wait64 does the same check (it might incur in a spurious
>> futex call).
>>
>> [...]
>> 268 int err = __futex_abstimed_wait64 (
>> 269 (unsigned int *) &mutex->__data.__lock,
>> 270 oldval, clockid, abstime,
>> 271 PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
>> 272 /* The futex call timed out. */
>> 273 if (err == ETIMEDOUT)
>> 274 return err;
>>
>> We need to handle EOVERFLOW since without a failure it is assumed the robust
>> mutex was locked.
>>
>>
>> The priority seems to be handle as expected:
>>
>> 307 case PTHREAD_MUTEX_PI_RECURSIVE_NP:
>> 308 case PTHREAD_MUTEX_PI_ERRORCHECK_NP:
>> 309 case PTHREAD_MUTEX_PI_NORMAL_NP:
>> 310 case PTHREAD_MUTEX_PI_ADAPTIVE_NP:
>> 311 case PTHREAD_MUTEX_PI_ROBUST_RECURSIVE_NP:
>> 312 case PTHREAD_MUTEX_PI_ROBUST_ERRORCHECK_NP:
>> 313 case PTHREAD_MUTEX_PI_ROBUST_NORMAL_NP:
>> 314 case PTHREAD_MUTEX_PI_ROBUST_ADAPTIVE_NP:
>> [...]
>> 380 /* The mutex is locked. The kernel will now take care of
>> 381 everything. The timeout value must be a relative value.
>> 382 Convert it. */
>> 383 int private = (robust
>> 384 ? PTHREAD_ROBUST_MUTEX_PSHARED (mutex)
>> 385 : PTHREAD_MUTEX_PSHARED (mutex));
>> 386 int e = futex_lock_pi64 (&mutex->__data.__lock, abstime, private);
>> 387 if (e == ETIMEDOUT)
>> 388 return ETIMEDOUT;
>> 389 else if (e == ESRCH || e == EDEADLK)
>> 390 {
>> 391 assert (e != EDEADLK
>> 392 || (kind != PTHREAD_MUTEX_ERRORCHECK_NP
>> 393 && kind != PTHREAD_MUTEX_RECURSIVE_NP));
>> 394 /* ESRCH can happen only for non-robust PI mutexes where
>> 395 the owner of the lock died. */
>> 396 assert (e != ESRCH || !robust);
>> 397
>> 398 /* Delay the thread until the timeout is reached. Then return
>> 399 ETIMEDOUT. */
>> 400 do
>> 401 e = __futex_abstimed_wait64 (&(unsigned int){0}, 0, clockid,
>> 402 abstime, private);
>> 403 while (e != ETIMEDOUT);
>> 404 return ETIMEDOUT;
>> 405 }
>> 406 else if (e != 0)
>> 407 return e;
>>
>> The EOVERFLOW for futex_lock_pi64 will be handled on the last 'e != 0',
>> and the internal '__futex_abstimed_wait64' should use a valid abstime
>> (since futex_lock_pi64 would fail early).
>>
>> As priority protected ones:
>>
>> 469 case PTHREAD_MUTEX_PP_RECURSIVE_NP:
>> 470 case PTHREAD_MUTEX_PP_ERRORCHECK_NP:
>> 471 case PTHREAD_MUTEX_PP_NORMAL_NP:
>> 472 case PTHREAD_MUTEX_PP_ADAPTIVE_NP:
>> [...]
>> 550 int e = __futex_abstimed_wait64 (
>> 551 (unsigned int *) &mutex->__data.__lock, ceilval | 2,
>> 552 clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
>> 553 if (e == ETIMEDOUT)
>> 554 return ETIMEDOUT;
>>
>> As you have noted we do need to handle EOVERFLOW here, otherwise it will
>> trigger busy infinite loop. It does not happen now because the timers
>> passed to __pthread_mutex_clocklock_common by 32-bit archs with 32-bit
>> support will always fix a 32-bti timespec; but they might happen once
>> we support the 64-bit time support on kernel older than v5.1.
>>
>>
>> On pthread rw/rdlock (nptl/pthread_rwlock_common.c) there are some some issues:
>>
>> 328 while (((r = atomic_load_relaxed (&rwlock->__data.__readers))
>> 329 & PTHREAD_RWLOCK_RWAITING) != 0)
>> 330 {
>> 331 int private = __pthread_rwlock_get_private (rwlock);
>> 332 int err = __futex_abstimed_wait64 (&rwlock->__data.__readers,
>> 333 r, clockid, abstime,
>> 334 private);
>> 335 /* We ignore EAGAIN and EINTR. On time-outs, we can just
>> 336 return because we don't need to clean up anything. */
>> 337 if (err == ETIMEDOUT)
>> 338 return err;
>> 339 }
>>
>> This also requires handle EOVERFLOW.
>>
>> 460 int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex,
>> 461 1 | PTHREAD_RWLOCK_FUTEX_USED,
>> 462 clockid, abstime, private);
>> 463 if (err == ETIMEDOUT)
>> 464 {
>> 465 /* If we timed out, we need to unregister. If no read phase
>> 466 has been installed while we waited, we can just decrement
>> 467 the number of readers. Otherwise, we just acquire the
>> 468 lock, which is allowed because we give no precise timing
>> 469 guarantees, and because the timeout is only required to
>> 470 be in effect if we would have had to wait for other
>> 471 threads (e.g., if futex_wait would time-out immediately
>> 472 because the given absolute time is in the past). */
>>
>> Same here, I think EOVERFLOW should be handled as ETIMEDOUT in this case.
>>
>> 730 int err = __futex_abstimed_wait64 (&rwlock->__data.__writers_futex,
>> 731 1 | PTHREAD_RWLOCK_FUTEX_USED,
>> 732 clockid, abstime, private);
>> 733 if (err == ETIMEDOUT)
>> 734 {
>> 735 if (prefer_writer)
>> 736 {
>> 737 /* We need to unregister as a waiting writer. If we are the
>> 738 last writer and writer--writer hand-over is available,
>> 739 we must make use of it because nobody else will reset
>> 740 WRLOCKED otherwise. (If we use it, we simply pretend
>> 741 that this happened before the timeout; see
>> 742 pthread_rwlock_rdlock_full for the full reasoning.)
>> 743 Also see the similar code above. */
>>
>> And
>>
>> 829 int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex,
>> 830 PTHREAD_RWLOCK_FUTEX_USED,
>> 831 clockid, abstime, private);
>> 832 if (err == ETIMEDOUT)
>> 833 {
>> 834 if (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP)
>> 835 {
>> 836 /* We try writer--writer hand-over. */
>>
>> Also need to handle EOVERFLOW as ETIMEDOUT as also return ETIMEOUT instead of
>> EOVERFLOW.
>>
>> I plan to address these on a subsequent patch.
>>
>
> Revising the __futex_clocklock64 usage, they also seems to already handle
> EOVERFLOW:
>
> 32 #ifndef lll_clocklock_elision
> 33 #define lll_clocklock_elision(futex, adapt_count, clockid, abstime, private) \
> 34 __futex_clocklock64 (&(futex), clockid, abstime, private)
> 35 #endif
>
> 78 /* We have to get the mutex. */
> 79 result = __futex_clocklock64 (&mutex->__data.__lock, clockid, abstime,
> 80 PTHREAD_MUTEX_PSHARED (mutex));
> 81
> 82 if (result != 0)
> 83 goto out;
>
> This already handles EOVERFLOW
>
>
> 100 simple:
> 101 /* Normal mutex. */
> 102 result = __futex_clocklock64 (&mutex->__data.__lock, clockid, abstime,
> 103 PTHREAD_MUTEX_PSHARED (mutex));
> 104 break;
>
> This also handled EOVERFLOW (it will bail out to line 583).
>
>
> 106 case PTHREAD_MUTEX_TIMED_ELISION_NP:
> 107 elision: __attribute__((unused))
> 108 /* Don't record ownership */
> 109 return lll_clocklock_elision (mutex->__data.__lock,
> 110 mutex->__data.__spins,
> 111 clockid, abstime,
> 112 PTHREAD_MUTEX_PSHARED (mutex));
>
> All the target overriden lll_clocklock_elision call target provided
> __lll_lock_elision and this return __futex_clocklock64, so it already handles
> EOVERFLOW.
>
>
> 115 case PTHREAD_MUTEX_ADAPTIVE_NP:
> 116 if (lll_trylock (mutex->__data.__lock) != 0)
> 117 {
> 118 int cnt = 0;
> 119 int max_cnt = MIN (max_adaptive_count (),
> 120 mutex->__data.__spins * 2 + 10);
> 121 do
> 122 {
> 123 if (cnt++ >= max_cnt)
> 124 {
> 125 result = __futex_clocklock64 (&mutex->__data.__lock,
> 126 clockid, abstime,
> 127 PTHREAD_MUTEX_PSHARED (mutex));
> 128 break;
> 129 }
> 130 atomic_spin_nop ();
> 131 }
> 132 while (lll_trylock (mutex->__data.__lock) != 0);
> 133
> 134 mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
> 135 }
>
> This should be safe as well (it will break once cnt reaches max_cnt). It could
> bail early if result is EOVERFLOW but it would require to adjust the
> __spins as well.
>
I forgot to analyze the usages of __futex_abstimed_wait_cancelable64 as well.
On nptl/pthread_cond_wait.c:
504 err = __futex_abstimed_wait_cancelable64 (
505 cond->__data.__g_signals + g, 0, clockid, abstime, private);
506
507 __pthread_cleanup_pop (&buffer, 0);
508
509 if (__glibc_unlikely (err == ETIMEDOUT))
510 {
This also requires handle EOVERFLOW.
The nptl/pthread_join_common.c is safe:
102 int ret = __futex_abstimed_wait_cancelable64 (
103 (unsigned int *) &pd->tid, tid, clockid, abstime, LLL_SHARED);
104 if (ret == ETIMEDOUT || ret == EOVERFLOW)
105 {
106 result = ret;
107 break;
108 }
The nptl/sem_waitcommon.c also requires fixing:
104 static int
105 __attribute__ ((noinline))
106 do_futex_wait (struct new_sem *sem, clockid_t clockid,
107 const struct __timespec64 *abstime)
108 {
109 int err;
110
111 #if __HAVE_64B_ATOMICS
112 err = __futex_abstimed_wait_cancelable64 (
113 (unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0,
114 clockid, abstime,
115 sem->private);
116 #else
117 err = __futex_abstimed_wait_cancelable64 (&sem->value, SEM_NWAITERS_MASK,
118 clockid, abstime, sem->private);
119 #endif
120
121 return err;
122 }
79 for (;;)
180 {
181 /* If there is no token available, sleep until there is. */
182 if ((d & SEM_VALUE_MASK) == 0)
183 {
184 err = do_futex_wait (sem, clockid, abstime);
[...]
194 if (err == ETIMEDOUT || err == EINTR)
195 {
196 __set_errno (err);
197 err = -1;
More information about the Libc-alpha
mailing list