[PATCH] nptl: Fix PTHREAD_PRIO_PROTECT timed lock

Adhemerval Zanella adhemerval.zanella@linaro.org
Fri Nov 27 11:22:27 GMT 2020



On 26/11/2020 09:51, Adhemerval Zanella wrote:
> 
> 
> On 26/11/2020 09:15, Adhemerval Zanella wrote:
>>
>>
>> On 26/11/2020 08:27, Mike Crowe wrote:
>>> On Wednesday 25 November 2020 at 17:27:23 -0300, Adhemerval Zanella wrote:
>>>> The 878fe624d4 changed lll_futex_timed_wait, which expects a relative
>>>> timeout, with a __futex_abstimed_wait64, which expects an absolute
>>>> timeout.  However the code still passes a relative timeout.
>>>>
>>>> Also, the PTHREAD_PRIO_PROTECT support for clocks different than
>>>> CLOCK_REALTIME was broken since the inclusion of
>>>> pthread_mutex_clocklock (9d20e22e46) since lll_futex_timed_wait
>>>> always use CLOCK_REALTIME.
>>>>
>>>> This patch fixes by removing the relative time calculation.  It
>>>> also adds some xtests that tests both thread and inter-process
>>>> usage.
>>>>
>>>> Checked on x86_64-linux-gnu.
>>>> ---
>>>>  nptl/Makefile                  |  3 ++-
>>>>  nptl/pthread_mutex_timedlock.c | 29 +++++------------------------
>>>>  nptl/tst-mutexpp5.c            |  2 ++
>>>>  nptl/tst-mutexpp9.c            |  2 ++
>>>>  sysdeps/pthread/tst-mutex5.c   | 12 +++++++++++-
>>>>  sysdeps/pthread/tst-mutex9.c   | 13 ++++++++++++-
>>>>  6 files changed, 34 insertions(+), 27 deletions(-)
>>>>  create mode 100644 nptl/tst-mutexpp5.c
>>>>  create mode 100644 nptl/tst-mutexpp9.c
>>>>
>>>> diff --git a/nptl/Makefile b/nptl/Makefile
>>>> index a48426a396..94d805f0d4 100644
>>>> --- a/nptl/Makefile
>>>> +++ b/nptl/Makefile
>>>> @@ -309,7 +309,8 @@ tests-internal := tst-robustpi8 tst-rwlock19 tst-rwlock20 \
>>>>  		  tst-setgetname \
>>>>  
>>>>  xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \
>>>> -	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups
>>>> +	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups \
>>>> +	tst-mutexpp5 tst-mutexpp9
>>>>  
>>>>  # This test can run into task limits because of a linux kernel bug
>>>>  # and then cause the make process to fail too, see bug 24537.
>>>> diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
>>>> index aaaafa21ce..74adffe790 100644
>>>> --- a/nptl/pthread_mutex_timedlock.c
>>>> +++ b/nptl/pthread_mutex_timedlock.c
>>>> @@ -547,30 +547,11 @@ __pthread_mutex_clocklock_common (pthread_mutex_t *mutex,
>>>>  			goto failpp;
>>>>  		      }
>>>>  
>>>> -		    struct __timespec64 rt;
>>>> -
>>>> -		    /* Get the current time.  */
>>>> -		    __clock_gettime64 (CLOCK_REALTIME, &rt);
>>>> -
>>>> -		    /* Compute relative timeout.  */
>>>> -		    rt.tv_sec = abstime->tv_sec - rt.tv_sec;
>>>> -		    rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
>>>> -		    if (rt.tv_nsec < 0)
>>>> -		      {
>>>> -			rt.tv_nsec += 1000000000;
>>>> -			--rt.tv_sec;
>>>> -		      }
>>>> -
>>>> -		    /* Already timed out?  */
>>>> -		    if (rt.tv_sec < 0)
>>>> -		      {
>>>> -			result = ETIMEDOUT;
>>>> -			goto failpp;
>>>> -		      }
>>>> -
>>>> -		    __futex_abstimed_wait64 (
>>>> -		      (unsigned int *) &mutex->__data.__lock, clockid,
>>>> -		      ceilval | 2, &rt, PTHREAD_MUTEX_PSHARED (mutex));
>>>> +		    int e = __futex_abstimed_wait64 (
>>>> +		      (unsigned int *) &mutex->__data.__lock, ceilval | 2,
>>>> +		      clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
>>>> +		    if (e == ETIMEDOUT)
>>>> +		      return ETIMEDOUT;
>>>
>>> I'm worried that futex could return other errors here which would cause a
>>> busy infinite loop. However, my attempts to provoke EINVAL have failed
>>> since the validity of abstime.tv_nsec is checked earlier. Presumably we
>>> should never get this far if EOVERFLOW could be returned? (If there is a
>>> problem here, then it also affects other mutex types which have similar
>>> code.)
>>
>> Revising the calls of futex-internal.h which might pass an timeout where
>> EOVERFLOW might happen, I see nptl code requires some fixes.
>>
>> At nptl/pthread_mutex_timedlock.c for robust mutexes:
>>
>> 138     case PTHREAD_MUTEX_ROBUST_RECURSIVE_NP:
>> 139     case PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP:
>> 140     case PTHREAD_MUTEX_ROBUST_NORMAL_NP:
>> 141     case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
>> [...]
>> 235           /* We are about to block; check whether the timeout is invalid.  */
>> 236           if (! valid_nanoseconds (abstime->tv_nsec))
>> 237             return EINVAL;
>> 238           /* Work around the fact that the kernel rejects negative timeout
>> 239              values despite them being valid.  */
>> 240           if (__glibc_unlikely (abstime->tv_sec < 0))
>> 241             return ETIMEDOUT;
>>
>> The FUTEX_WAITERS will trigger a futex wake on pthread_mutex_unlock, so 
>> this check is an optimization to avoid it.  It could be removed since
>> __futex_abstimed_wait64 does the same check (it might incur in a spurious
>> futex call).
>>
>> [...]
>> 268           int err = __futex_abstimed_wait64 (
>> 269               (unsigned int *) &mutex->__data.__lock,
>> 270               oldval, clockid, abstime,
>> 271               PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
>> 272           /* The futex call timed out.  */
>> 273           if (err == ETIMEDOUT)
>> 274             return err;
>>
>> We need to handle EOVERFLOW since without a failure it is assumed the robust
>> mutex was locked.
>>
>>
>> The priority seems to be handle as expected:
>>
>> 307     case PTHREAD_MUTEX_PI_RECURSIVE_NP:
>> 308     case PTHREAD_MUTEX_PI_ERRORCHECK_NP:
>> 309     case PTHREAD_MUTEX_PI_NORMAL_NP:
>> 310     case PTHREAD_MUTEX_PI_ADAPTIVE_NP:
>> 311     case PTHREAD_MUTEX_PI_ROBUST_RECURSIVE_NP:
>> 312     case PTHREAD_MUTEX_PI_ROBUST_ERRORCHECK_NP:
>> 313     case PTHREAD_MUTEX_PI_ROBUST_NORMAL_NP:
>> 314     case PTHREAD_MUTEX_PI_ROBUST_ADAPTIVE_NP:
>> [...]
>> 380             /* The mutex is locked.  The kernel will now take care of
>> 381                everything.  The timeout value must be a relative value.
>> 382                Convert it.  */
>> 383             int private = (robust
>> 384                            ? PTHREAD_ROBUST_MUTEX_PSHARED (mutex)
>> 385                            : PTHREAD_MUTEX_PSHARED (mutex));
>> 386             int e = futex_lock_pi64 (&mutex->__data.__lock, abstime, private);
>> 387             if (e == ETIMEDOUT)
>> 388               return ETIMEDOUT;
>> 389             else if (e == ESRCH || e == EDEADLK)
>> 390               {
>> 391                 assert (e != EDEADLK
>> 392                         || (kind != PTHREAD_MUTEX_ERRORCHECK_NP
>> 393                            && kind != PTHREAD_MUTEX_RECURSIVE_NP));
>> 394                 /* ESRCH can happen only for non-robust PI mutexes where
>> 395                    the owner of the lock died.  */
>> 396                 assert (e != ESRCH || !robust);
>> 397 
>> 398                 /* Delay the thread until the timeout is reached. Then return
>> 399                    ETIMEDOUT.  */
>> 400                 do
>> 401                   e = __futex_abstimed_wait64 (&(unsigned int){0}, 0, clockid,
>> 402                                                abstime, private);
>> 403                 while (e != ETIMEDOUT);
>> 404                 return ETIMEDOUT;
>> 405               }
>> 406             else if (e != 0)
>> 407               return e;
>>
>> The EOVERFLOW for futex_lock_pi64 will be handled on the last 'e != 0',
>> and the internal '__futex_abstimed_wait64' should use a valid abstime
>> (since futex_lock_pi64 would fail early).
>>
>> As priority protected ones:
>>
>> 469     case PTHREAD_MUTEX_PP_RECURSIVE_NP:
>> 470     case PTHREAD_MUTEX_PP_ERRORCHECK_NP:
>> 471     case PTHREAD_MUTEX_PP_NORMAL_NP:
>> 472     case PTHREAD_MUTEX_PP_ADAPTIVE_NP:
>> [...]
>> 550                     int e = __futex_abstimed_wait64 (
>> 551                       (unsigned int *) &mutex->__data.__lock, ceilval | 2,
>> 552                       clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
>> 553                     if (e == ETIMEDOUT)
>> 554                       return ETIMEDOUT;
>>
>> As you have noted we do need to handle EOVERFLOW here, otherwise it will
>> trigger busy infinite loop.  It does not happen now because the timers
>> passed to __pthread_mutex_clocklock_common by 32-bit archs with 32-bit
>> support will always fix a 32-bti timespec; but they might happen once
>> we support the 64-bit time support on kernel older than v5.1.
>>
>>
>> On pthread rw/rdlock (nptl/pthread_rwlock_common.c) there are some some issues:
>>
>> 328               while (((r = atomic_load_relaxed (&rwlock->__data.__readers))
>> 329                       & PTHREAD_RWLOCK_RWAITING) != 0)
>> 330                 {
>> 331                   int private = __pthread_rwlock_get_private (rwlock);
>> 332                   int err = __futex_abstimed_wait64 (&rwlock->__data.__readers,
>> 333                                                      r, clockid, abstime,
>> 334                                                      private);
>> 335                   /* We ignore EAGAIN and EINTR.  On time-outs, we can just
>> 336                      return because we don't need to clean up anything.  */
>> 337                   if (err == ETIMEDOUT)
>> 338                     return err;
>> 339                 }
>>
>> This also requires handle EOVERFLOW.
>>
>> 460           int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex,
>> 461                                              1 | PTHREAD_RWLOCK_FUTEX_USED,
>> 462                                              clockid, abstime, private);
>> 463           if (err == ETIMEDOUT)
>> 464             {
>> 465               /* If we timed out, we need to unregister.  If no read phase
>> 466                  has been installed while we waited, we can just decrement
>> 467                  the number of readers.  Otherwise, we just acquire the
>> 468                  lock, which is allowed because we give no precise timing
>> 469                  guarantees, and because the timeout is only required to
>> 470                  be in effect if we would have had to wait for other
>> 471                  threads (e.g., if futex_wait would time-out immediately
>> 472                  because the given absolute time is in the past).  */
>>
>> Same here, I think EOVERFLOW should be handled as ETIMEDOUT in this case.
>>
>> 730           int err = __futex_abstimed_wait64 (&rwlock->__data.__writers_futex,
>> 731                                              1 | PTHREAD_RWLOCK_FUTEX_USED,
>> 732                                              clockid, abstime, private);
>> 733           if (err == ETIMEDOUT)
>> 734             {
>> 735               if (prefer_writer)
>> 736                 {
>> 737                   /* We need to unregister as a waiting writer.  If we are the
>> 738                      last writer and writer--writer hand-over is available,
>> 739                      we must make use of it because nobody else will reset
>> 740                      WRLOCKED otherwise.  (If we use it, we simply pretend
>> 741                      that this happened before the timeout; see
>> 742                      pthread_rwlock_rdlock_full for the full reasoning.)
>> 743                      Also see the similar code above.  */
>>
>> And
>>
>> 829           int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex,
>> 830                                              PTHREAD_RWLOCK_FUTEX_USED,
>> 831                                              clockid, abstime, private);
>> 832           if (err == ETIMEDOUT)
>> 833             {
>> 834               if (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP)
>> 835                 {
>> 836                   /* We try writer--writer hand-over.  */
>>
>> Also need to handle EOVERFLOW as ETIMEDOUT as also return ETIMEOUT instead of
>> EOVERFLOW.
>>
>> I plan to address these on a subsequent patch.
>>
> 
> Revising the __futex_clocklock64 usage, they also seems to already handle
> EOVERFLOW:
> 
>  32 #ifndef lll_clocklock_elision
>  33 #define lll_clocklock_elision(futex, adapt_count, clockid, abstime, private) \
>  34   __futex_clocklock64 (&(futex), clockid, abstime, private)
>  35 #endif
> 
>  78       /* We have to get the mutex.  */
>  79       result = __futex_clocklock64 (&mutex->__data.__lock, clockid, abstime,
>  80                                     PTHREAD_MUTEX_PSHARED (mutex));
>  81 
>  82       if (result != 0)
>  83         goto out;
> 
> This already handles EOVERFLOW
> 
> 
> 100     simple:
> 101       /* Normal mutex.  */
> 102       result = __futex_clocklock64 (&mutex->__data.__lock, clockid, abstime,
> 103                                     PTHREAD_MUTEX_PSHARED (mutex));
> 104       break;
> 
> This also handled EOVERFLOW (it will bail out to line 583).
> 
> 
> 106     case PTHREAD_MUTEX_TIMED_ELISION_NP:
> 107     elision: __attribute__((unused))
> 108       /* Don't record ownership */
> 109       return lll_clocklock_elision (mutex->__data.__lock,
> 110                                     mutex->__data.__spins,
> 111                                     clockid, abstime,
> 112                                     PTHREAD_MUTEX_PSHARED (mutex));
> 
> All the target overriden lll_clocklock_elision call target provided
> __lll_lock_elision and this return __futex_clocklock64, so it already handles
> EOVERFLOW.
> 
> 
> 115     case PTHREAD_MUTEX_ADAPTIVE_NP:
> 116       if (lll_trylock (mutex->__data.__lock) != 0)
> 117         {
> 118           int cnt = 0;
> 119           int max_cnt = MIN (max_adaptive_count (),
> 120                              mutex->__data.__spins * 2 + 10);
> 121           do
> 122             {
> 123               if (cnt++ >= max_cnt)
> 124                 {
> 125                   result = __futex_clocklock64 (&mutex->__data.__lock,
> 126                                                 clockid, abstime,
> 127                                                 PTHREAD_MUTEX_PSHARED (mutex));
> 128                   break;
> 129                 }
> 130               atomic_spin_nop ();
> 131             }
> 132           while (lll_trylock (mutex->__data.__lock) != 0);
> 133 
> 134           mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
> 135         }
> 
> This should be safe as well (it will break once cnt reaches max_cnt). It could
> bail early if result is EOVERFLOW but it would require to adjust the
> __spins as well.
> 

I forgot to analyze the usages of __futex_abstimed_wait_cancelable64 as well.

On nptl/pthread_cond_wait.c:

504           err = __futex_abstimed_wait_cancelable64 (
505             cond->__data.__g_signals + g, 0, clockid, abstime, private);
506 
507           __pthread_cleanup_pop (&buffer, 0);
508 
509           if (__glibc_unlikely (err == ETIMEDOUT))
510             {

This also requires handle EOVERFLOW.

The nptl/pthread_join_common.c is safe:

102           int ret = __futex_abstimed_wait_cancelable64 (
103             (unsigned int *) &pd->tid, tid, clockid, abstime, LLL_SHARED);
104           if (ret == ETIMEDOUT || ret == EOVERFLOW)
105             {
106               result = ret;
107               break;
108             }

The nptl/sem_waitcommon.c also requires fixing:

104 static int
105 __attribute__ ((noinline))
106 do_futex_wait (struct new_sem *sem, clockid_t clockid,
107                const struct __timespec64 *abstime)
108 {
109   int err;
110 
111 #if __HAVE_64B_ATOMICS
112   err = __futex_abstimed_wait_cancelable64 (
113       (unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0,
114       clockid, abstime,
115       sem->private);
116 #else
117   err = __futex_abstimed_wait_cancelable64 (&sem->value, SEM_NWAITERS_MASK,
118                                             clockid, abstime, sem->private);
119 #endif
120 
121   return err;
122 }

79   for (;;)
180     {
181       /* If there is no token available, sleep until there is.  */
182       if ((d & SEM_VALUE_MASK) == 0)
183         {
184           err = do_futex_wait (sem, clockid, abstime);
[...]
194           if (err == ETIMEDOUT || err == EINTR)
195             {
196               __set_errno (err);
197               err = -1;



More information about the Libc-alpha mailing list