This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH v2] Extend i486 pthread_cond_timedwait to use futex syscall with absolute timeout
- From: Andreas Schwab <schwab at suse dot de>
- To: libc-alpha at sourceware dot org
- Date: Wed, 05 Dec 2012 12:15:54 +0100
- Subject: [PATCH v2] Extend i486 pthread_cond_timedwait to use futex syscall with absolute timeout
- References: <mvmfw3vfggq.fsf@g208.suse.de>
This implements proper handling of the absolute timeout for the i486+
version of pthread_cond_timedwait, following the general structure of
the x86-64 version. No testsuite regressions, and I have also tested it
with __have_futex_clock_realtime forced to 0 to trigger the fallback
code.
v2: renumber labels
Andreas.
* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
(__pthread_cond_timedwait): If possible use FUTEX_WAIT_BITSET to
directly use absolute timeout.
---
.../sysv/linux/i386/i486/pthread_cond_timedwait.S | 381 +++++++++++++++++----
1 file changed, 315 insertions(+), 66 deletions(-)
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
index 6011f69..b9dbe6b 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
@@ -66,6 +66,34 @@ __pthread_cond_timedwait:
movl $EINVAL, %eax
jae 18f
+ /* Stack frame:
+
+ esp + 32
+ +--------------------------+
+ esp + 24 | timeout value |
+ +--------------------------+
+ esp + 20 | futex pointer |
+ +--------------------------+
+ esp + 16 | pi-requeued flag |
+ +--------------------------+
+ esp + 12 | old broadcast_seq value |
+ +--------------------------+
+ esp + 4 | old wake_seq value |
+ +--------------------------+
+ esp + 0 | old cancellation mode |
+ +--------------------------+
+ */
+
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+# ifdef PIC
+ LOAD_PIC_REG (cx)
+ cmpl $0, __have_futex_clock_realtime@GOTOFF(%ecx)
+# else
+ cmpl $0, __have_futex_clock_realtime
+# endif
+ je .Lreltmo
+#endif
+
/* Get internal lock. */
movl $1, %edx
xorl %eax, %eax
@@ -96,7 +124,11 @@ __pthread_cond_timedwait:
addl $1, cond_futex(%ebx)
addl $(1 << nwaiters_shift), cond_nwaiters(%ebx)
-#define FRAME_SIZE 32
+#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
+# define FRAME_SIZE 24
+#else
+# define FRAME_SIZE 32
+#endif
subl $FRAME_SIZE, %esp
cfi_adjust_cfa_offset(FRAME_SIZE)
cfi_remember_state
@@ -105,60 +137,19 @@ __pthread_cond_timedwait:
movl wakeup_seq(%ebx), %edi
movl wakeup_seq+4(%ebx), %edx
movl broadcast_seq(%ebx), %eax
- movl %edi, 12(%esp)
- movl %edx, 16(%esp)
- movl %eax, 20(%esp)
+ movl %edi, 4(%esp)
+ movl %edx, 8(%esp)
+ movl %eax, 12(%esp)
/* Reset the pi-requeued flag. */
-8: movl $0, 24(%esp)
- /* Get the current time. */
- movl %ebx, %edx
-#ifdef __NR_clock_gettime
- /* Get the clock number. */
- movl cond_nwaiters(%ebx), %ebx
- andl $((1 << nwaiters_shift) - 1), %ebx
- /* Only clocks 0 and 1 are allowed so far. Both are handled in the
- kernel. */
- leal 4(%esp), %ecx
- movl $__NR_clock_gettime, %eax
- ENTER_KERNEL
- movl %edx, %ebx
-
- /* Compute relative timeout. */
- movl (%ebp), %ecx
- movl 4(%ebp), %edx
- subl 4(%esp), %ecx
- subl 8(%esp), %edx
-#else
- /* Get the current time. */
- leal 4(%esp), %ebx
- xorl %ecx, %ecx
- movl $__NR_gettimeofday, %eax
- ENTER_KERNEL
- movl %edx, %ebx
+ movl $0, 16(%esp)
- /* Compute relative timeout. */
- movl 8(%esp), %eax
- movl $1000, %edx
- mul %edx /* Milli seconds to nano seconds. */
- movl (%ebp), %ecx
- movl 4(%ebp), %edx
- subl 4(%esp), %ecx
- subl %eax, %edx
-#endif
- jns 12f
- addl $1000000000, %edx
- subl $1, %ecx
-12: testl %ecx, %ecx
+ cmpl $0, (%ebp)
movl $-ETIMEDOUT, %esi
js 6f
- /* Store relative timeout. */
-21: movl %ecx, 4(%esp)
- movl %edx, 8(%esp)
-
- movl cond_futex(%ebx), %edi
- movl %edi, 28(%esp)
+8: movl cond_futex(%ebx), %edi
+ movl %edi, 20(%esp)
/* Unlock. */
LOCK
@@ -173,6 +164,7 @@ __pthread_cond_timedwait:
4: call __pthread_enable_asynccancel
movl %eax, (%esp)
+ leal (%ebp), %esi
#if FUTEX_PRIVATE_FLAG > 255
xorl %ecx, %ecx
#endif
@@ -196,9 +188,7 @@ __pthread_cond_timedwait:
jne 42f
orl $FUTEX_CLOCK_REALTIME, %ecx
- /* Requeue-PI uses absolute timeout */
-42: leal (%ebp), %esi
- movl 28(%esp), %edx
+42: movl 20(%esp), %edx
addl $cond_futex, %ebx
.Ladd_cond_futex_pi:
movl $SYS_futex, %eax
@@ -209,12 +199,12 @@ __pthread_cond_timedwait:
/* Set the pi-requeued flag only if the kernel has returned 0. The
kernel does not hold the mutex on ETIMEDOUT or any other error. */
cmpl $0, %eax
- sete 24(%esp)
+ sete 16(%esp)
je 41f
/* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns
successfully, it has already locked the mutex for us and the
- pi_flag (24(%esp)) is set to denote that fact. However, if another
+ pi_flag (16(%esp)) is set to denote that fact. However, if another
thread changed the futex value before we entered the wait, the
syscall may return an EAGAIN and the mutex is not locked. We go
ahead with a success anyway since later we look at the pi_flag to
@@ -234,22 +224,28 @@ __pthread_cond_timedwait:
xorl %ecx, %ecx
40: subl $1, %ecx
+ movl $0, 16(%esp)
#ifdef __ASSUME_PRIVATE_FUTEX
andl $FUTEX_PRIVATE_FLAG, %ecx
#else
andl %gs:PRIVATE_FUTEX, %ecx
#endif
-#if FUTEX_WAIT != 0
- addl $FUTEX_WAIT, %ecx
-#endif
- leal 4(%esp), %esi
- movl 28(%esp), %edx
+ addl $FUTEX_WAIT_BITSET, %ecx
+ /* The following only works like this because we only support
+ two clocks, represented using a single bit. */
+ testl $1, cond_nwaiters(%ebx)
+ jne 30f
+ orl $FUTEX_CLOCK_REALTIME, %ecx
+30:
+ movl 20(%esp), %edx
+ movl $0xffffffff, %ebp
addl $cond_futex, %ebx
.Ladd_cond_futex:
movl $SYS_futex, %eax
ENTER_KERNEL
subl $cond_futex, %ebx
.Lsub_cond_futex:
+ movl 28+FRAME_SIZE(%esp), %ebp
movl %eax, %esi
41: movl (%esp), %eax
@@ -268,7 +264,7 @@ __pthread_cond_timedwait:
jnz 5f
6: movl broadcast_seq(%ebx), %eax
- cmpl 20(%esp), %eax
+ cmpl 12(%esp), %eax
jne 23f
movl woken_seq(%ebx), %eax
@@ -277,9 +273,9 @@ __pthread_cond_timedwait:
movl wakeup_seq(%ebx), %edi
movl wakeup_seq+4(%ebx), %edx
- cmpl 16(%esp), %edx
+ cmpl 8(%esp), %edx
jne 7f
- cmpl 12(%esp), %edi
+ cmpl 4(%esp), %edi
je 15f
7: cmpl %ecx, %edx
@@ -292,7 +288,7 @@ __pthread_cond_timedwait:
/* We need to go back to futex_wait. If we're using requeue_pi, then
release the mutex we had acquired and go back. */
- movl 24(%esp), %edx
+ movl 16(%esp), %edx
test %edx, %edx
jz 8b
@@ -357,13 +353,13 @@ __pthread_cond_timedwait:
11: movl 24+FRAME_SIZE(%esp), %eax
/* With requeue_pi, the mutex lock is held in the kernel. */
- movl 24(%esp), %ecx
+ movl 16(%esp), %ecx
testl %ecx, %ecx
jnz 27f
call __pthread_mutex_cond_lock
26: addl $FRAME_SIZE, %esp
- cfi_adjust_cfa_offset(-FRAME_SIZE);
+ cfi_adjust_cfa_offset(-FRAME_SIZE)
/* We return the result of the mutex_lock operation if it failed. */
testl %eax, %eax
@@ -509,6 +505,245 @@ __pthread_cond_timedwait:
#endif
call __lll_unlock_wake
jmp 11b
+ cfi_adjust_cfa_offset(-FRAME_SIZE)
+
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+.Lreltmo:
+ /* Get internal lock. */
+ movl $1, %edx
+ xorl %eax, %eax
+ LOCK
+# if cond_lock == 0
+ cmpxchgl %edx, (%ebx)
+# else
+ cmpxchgl %edx, cond_lock(%ebx)
+# endif
+ jnz 101f
+
+ /* Store the reference to the mutex. If there is already a
+ different value in there this is a bad user bug. */
+102: cmpl $-1, dep_mutex(%ebx)
+ movl 24(%esp), %eax
+ je 117f
+ movl %eax, dep_mutex(%ebx)
+
+ /* Unlock the mutex. */
+117: xorl %edx, %edx
+ call __pthread_mutex_unlock_usercnt
+
+ testl %eax, %eax
+ jne 16b
+
+ addl $1, total_seq(%ebx)
+ adcl $0, total_seq+4(%ebx)
+ addl $1, cond_futex(%ebx)
+ addl $(1 << nwaiters_shift), cond_nwaiters(%ebx)
+
+ subl $FRAME_SIZE, %esp
+ cfi_adjust_cfa_offset(FRAME_SIZE)
+
+ /* Get and store current wakeup_seq value. */
+ movl wakeup_seq(%ebx), %edi
+ movl wakeup_seq+4(%ebx), %edx
+ movl broadcast_seq(%ebx), %eax
+ movl %edi, 4(%esp)
+ movl %edx, 8(%esp)
+ movl %eax, 12(%esp)
+
+ /* Reset the pi-requeued flag. */
+ movl $0, 16(%esp)
+
+ /* Get the current time. */
+108: movl %ebx, %edx
+# ifdef __NR_clock_gettime
+ /* Get the clock number. */
+ movl cond_nwaiters(%ebx), %ebx
+ andl $((1 << nwaiters_shift) - 1), %ebx
+ /* Only clocks 0 and 1 are allowed so far. Both are handled in the
+ kernel. */
+ leal 24(%esp), %ecx
+ movl $__NR_clock_gettime, %eax
+ ENTER_KERNEL
+ movl %edx, %ebx
+
+ /* Compute relative timeout. */
+ movl (%ebp), %ecx
+ movl 4(%ebp), %edx
+ subl 24(%esp), %ecx
+ subl 28(%esp), %edx
+# else
+ /* Get the current time. */
+ leal 24(%esp), %ebx
+ xorl %ecx, %ecx
+ movl $__NR_gettimeofday, %eax
+ ENTER_KERNEL
+ movl %edx, %ebx
+
+ /* Compute relative timeout. */
+ movl 28(%esp), %eax
+ movl $1000, %edx
+ mul %edx /* Milli seconds to nano seconds. */
+ movl (%ebp), %ecx
+ movl 4(%ebp), %edx
+ subl 24(%esp), %ecx
+ subl %eax, %edx
+# endif
+ jns 112f
+ addl $1000000000, %edx
+ subl $1, %ecx
+112: testl %ecx, %ecx
+ movl $-ETIMEDOUT, %esi
+ js 106f
+
+ /* Store relative timeout. */
+121: movl %ecx, 24(%esp)
+ movl %edx, 28(%esp)
+
+ movl cond_futex(%ebx), %edi
+ movl %edi, 20(%esp)
+
+ /* Unlock. */
+ LOCK
+# if cond_lock == 0
+ subl $1, (%ebx)
+# else
+ subl $1, cond_lock(%ebx)
+# endif
+ jne 103f
+
+.LcleanupSTART2:
+104: call __pthread_enable_asynccancel
+ movl %eax, (%esp)
+
+ leal 24(%esp), %esi
+# if FUTEX_PRIVATE_FLAG > 255
+ xorl %ecx, %ecx
+# endif
+ cmpl $-1, dep_mutex(%ebx)
+ sete %cl
+ subl $1, %ecx
+# ifdef __ASSUME_PRIVATE_FUTEX
+ andl $FUTEX_PRIVATE_FLAG, %ecx
+# else
+ andl %gs:PRIVATE_FUTEX, %ecx
+# endif
+# if FUTEX_WAIT != 0
+ addl $FUTEX_WAIT, %ecx
+# endif
+ movl 20(%esp), %edx
+ addl $cond_futex, %ebx
+.Ladd_cond_futex2:
+ movl $SYS_futex, %eax
+ ENTER_KERNEL
+ subl $cond_futex, %ebx
+.Lsub_cond_futex2:
+ movl %eax, %esi
+
+141: movl (%esp), %eax
+ call __pthread_disable_asynccancel
+.LcleanupEND2:
+
+
+ /* Lock. */
+ movl $1, %edx
+ xorl %eax, %eax
+ LOCK
+# if cond_lock == 0
+ cmpxchgl %edx, (%ebx)
+# else
+ cmpxchgl %edx, cond_lock(%ebx)
+# endif
+ jnz 105f
+
+106: movl broadcast_seq(%ebx), %eax
+ cmpl 12(%esp), %eax
+ jne 23b
+
+ movl woken_seq(%ebx), %eax
+ movl woken_seq+4(%ebx), %ecx
+
+ movl wakeup_seq(%ebx), %edi
+ movl wakeup_seq+4(%ebx), %edx
+
+ cmpl 8(%esp), %edx
+ jne 107f
+ cmpl 4(%esp), %edi
+ je 115f
+
+107: cmpl %ecx, %edx
+ jne 9b
+ cmp %eax, %edi
+ jne 9b
+
+115: cmpl $-ETIMEDOUT, %esi
+ je 28b
+
+ jmp 8b
+
+ cfi_adjust_cfa_offset(-FRAME_SIZE)
+ /* Initial locking failed. */
+101:
+# if cond_lock == 0
+ movl %ebx, %edx
+# else
+ leal cond_lock(%ebx), %edx
+# endif
+# if (LLL_SHARED-LLL_PRIVATE) > 255
+ xorl %ecx, %ecx
+# endif
+ cmpl $-1, dep_mutex(%ebx)
+ setne %cl
+ subl $1, %ecx
+ andl $(LLL_SHARED-LLL_PRIVATE), %ecx
+# if LLL_PRIVATE != 0
+ addl $LLL_PRIVATE, %ecx
+# endif
+ call __lll_lock_wait
+ jmp 102b
+
+ cfi_adjust_cfa_offset(FRAME_SIZE)
+
+ /* Unlock in loop requires wakeup. */
+103:
+# if cond_lock == 0
+ movl %ebx, %eax
+# else
+ leal cond_lock(%ebx), %eax
+# endif
+# if (LLL_SHARED-LLL_PRIVATE) > 255
+ xorl %ecx, %ecx
+# endif
+ cmpl $-1, dep_mutex(%ebx)
+ setne %cl
+ subl $1, %ecx
+ andl $(LLL_SHARED-LLL_PRIVATE), %ecx
+# if LLL_PRIVATE != 0
+ addl $LLL_PRIVATE, %ecx
+# endif
+ call __lll_unlock_wake
+ jmp 104b
+
+ /* Locking in loop failed. */
+105:
+# if cond_lock == 0
+ movl %ebx, %edx
+# else
+ leal cond_lock(%ebx), %edx
+# endif
+# if (LLL_SHARED-LLL_PRIVATE) > 255
+ xorl %ecx, %ecx
+# endif
+ cmpl $-1, dep_mutex(%ebx)
+ setne %cl
+ subl $1, %ecx
+ andl $(LLL_SHARED-LLL_PRIVATE), %ecx
+# if LLL_PRIVATE != 0
+ addl $LLL_PRIVATE, %ecx
+# endif
+ call __lll_lock_wait
+ jmp 106b
+ cfi_adjust_cfa_offset(-FRAME_SIZE)
+#endif
.size __pthread_cond_timedwait, .-__pthread_cond_timedwait
versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
@@ -552,7 +787,7 @@ __condvar_tw_cleanup:
call __lll_lock_wait
1: movl broadcast_seq(%ebx), %eax
- cmpl 20(%esp), %eax
+ cmpl 12(%esp), %eax
jne 3f
/* We increment the wakeup_seq counter only if it is lower than
@@ -706,6 +941,20 @@ __condvar_tw_cleanup:
.long .LcleanupEND-.Lsub_cond_futex
.long __condvar_tw_cleanup-.LSTARTCODE
.uleb128 0
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+ .long .LcleanupSTART2-.LSTARTCODE
+ .long .Ladd_cond_futex2-.LcleanupSTART2
+ .long __condvar_tw_cleanup-.LSTARTCODE
+ .uleb128 0
+ .long .Ladd_cond_futex2-.LSTARTCODE
+ .long .Lsub_cond_futex2-.Ladd_cond_futex2
+ .long __condvar_tw_cleanup2-.LSTARTCODE
+ .uleb128 0
+ .long .Lsub_cond_futex2-.LSTARTCODE
+ .long .LcleanupEND2-.Lsub_cond_futex2
+ .long __condvar_tw_cleanup-.LSTARTCODE
+ .uleb128 0
+#endif
.long .LcallUR-.LSTARTCODE
.long .LENDCODE-.LcallUR
.long 0
--
1.8.0.1
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."