From 99f841c441feeaa9a3d97fd91bb3d6ec8073c982 Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Wed, 21 Apr 2021 19:49:51 +0200 Subject: [PATCH] nptl: pthread_mutex_lock, pthread_mutex_unock single-threaded optimization This is optimization is similar in spirit to the SINGLE_THREAD_P check in the malloc implementation. Doing this in generic code allows us to prioritize those cases which are likely to occur in single-threaded programs (normal and recursive mutexes). Reviewed-by: Adhemerval Zanella --- nptl/pthread_mutex_cond_lock.c | 1 + nptl/pthread_mutex_lock.c | 25 ++++++++++++++++++++++--- nptl/pthread_mutex_unlock.c | 17 ++++++++++++++++- 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/nptl/pthread_mutex_cond_lock.c b/nptl/pthread_mutex_cond_lock.c index 2f0771302f..3386bd689b 100644 --- a/nptl/pthread_mutex_cond_lock.c +++ b/nptl/pthread_mutex_cond_lock.c @@ -2,6 +2,7 @@ #define LLL_MUTEX_LOCK(mutex) \ lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)) +#define LLL_MUTEX_LOCK_OPTIMIZED(mutex) LLL_MUTEX_LOCK (mutex) /* Not actually elided so far. Needed? */ #define LLL_MUTEX_LOCK_ELISION(mutex) \ diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c index f0de7b7fd6..8649a92ffb 100644 --- a/nptl/pthread_mutex_lock.c +++ b/nptl/pthread_mutex_lock.c @@ -30,8 +30,27 @@ /* Some of the following definitions differ when pthread_mutex_cond_lock.c includes this file. */ #ifndef LLL_MUTEX_LOCK -# define LLL_MUTEX_LOCK(mutex) \ +/* lll_lock with single-thread optimization. */ +static inline void +lll_mutex_lock_optimized (pthread_mutex_t *mutex) +{ + /* The single-threaded optimization is only valid for private + mutexes. For process-shared mutexes, the mutex could be in a + shared mapping, so synchronization with another process is needed + even without any threads. If the lock is already marked as + acquired, POSIX requires that pthread_mutex_lock deadlocks for + normal mutexes, so skip the optimization in that case as + well. */ + int private = PTHREAD_MUTEX_PSHARED (mutex); + if (private == LLL_PRIVATE && SINGLE_THREAD_P && mutex->__data.__lock == 0) + mutex->__data.__lock = 1; + else + lll_lock (mutex->__data.__lock, private); +} + +# define LLL_MUTEX_LOCK(mutex) \ lll_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)) +# define LLL_MUTEX_LOCK_OPTIMIZED(mutex) lll_mutex_lock_optimized (mutex) # define LLL_MUTEX_TRYLOCK(mutex) \ lll_trylock ((mutex)->__data.__lock) # define LLL_ROBUST_MUTEX_LOCK_MODIFIER 0 @@ -64,7 +83,7 @@ __pthread_mutex_lock (pthread_mutex_t *mutex) FORCE_ELISION (mutex, goto elision); simple: /* Normal mutex. */ - LLL_MUTEX_LOCK (mutex); + LLL_MUTEX_LOCK_OPTIMIZED (mutex); assert (mutex->__data.__owner == 0); } #if ENABLE_ELISION_SUPPORT @@ -99,7 +118,7 @@ __pthread_mutex_lock (pthread_mutex_t *mutex) } /* We have to get the mutex. */ - LLL_MUTEX_LOCK (mutex); + LLL_MUTEX_LOCK_OPTIMIZED (mutex); assert (mutex->__data.__owner == 0); mutex->__data.__count = 1; diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c index 3b5ccdacf9..655093ee9a 100644 --- a/nptl/pthread_mutex_unlock.c +++ b/nptl/pthread_mutex_unlock.c @@ -28,6 +28,21 @@ static int __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr) __attribute_noinline__; +/* lll_lock with single-thread optimization. */ +static inline void +lll_mutex_unlock_optimized (pthread_mutex_t *mutex) +{ + /* The single-threaded optimization is only valid for private + mutexes. For process-shared mutexes, the mutex could be in a + shared mapping, so synchronization with another process is needed + even without any threads. */ + int private = PTHREAD_MUTEX_PSHARED (mutex); + if (private == LLL_PRIVATE && SINGLE_THREAD_P) + mutex->__data.__lock = 0; + else + lll_unlock (mutex->__data.__lock, private); +} + int attribute_hidden __pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr) @@ -51,7 +66,7 @@ __pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr) --mutex->__data.__nusers; /* Unlock. */ - lll_unlock (mutex->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)); + lll_mutex_unlock_optimized (mutex); LIBC_PROBE (mutex_release, 1, mutex); -- 2.43.5