[RFC PATCH 2/2] Add x86_64 arch support for Requeue PI
Dinakar Guniguntala
dino@in.ibm.com
Tue May 5 21:31:00 GMT 2009
This patch adds the x86_64 optimised code for requeue PI
Testing is same as mentioned in the previous mail
2009-05-05 Dinakar Guniguntala <dino@in.ibm.com>
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Add
support for using FUTEX_CMP_REQUEUE_PI for PI mutexes. For all
the pthread_cond_xxx APIs, a Vanilla Private mutex is considered
as the fast path and the PI mutex changes here ensure that the
fast path is not affected.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Add
support for using FUTEX_WAIT_REQUEUE_PI for PI mutexes.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
diff -Nurp libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
--- libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S 2007-08-13 22:27:03.000000000 -0400
+++ libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S 2009-04-29 04:39:18.000000000 -0400
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009
Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -70,19 +70,22 @@ __pthread_cond_broadcast:
8: cmpq $-1, %r8
je 9f
- /* XXX: The kernel so far doesn't support requeue to PI futex. */
- /* XXX: The kernel only supports FUTEX_CMP_REQUEUE to the same
- type of futex (private resp. shared). */
- testl $(PI_BIT | PS_BIT), MUTEX_KIND(%r8)
+ /* Do not use requeue for pshared condvars. */
+ testl $PS_BIT, MUTEX_KIND(%r8)
jne 9f
- /* Wake up all threads. */
#ifdef __ASSUME_PRIVATE_FUTEX
- movl $(FUTEX_CMP_REQUEUE|FUTEX_PRIVATE_FLAG), %esi
+ movl $FUTEX_PRIVATE_FLAG, %esi
#else
movl %fs:PRIVATE_FUTEX, %esi
- orl $FUTEX_CMP_REQUEUE, %esi
#endif
+ /* Requeue to a PI mutex if the PI bit is set */
+ testl $PI_BIT, MUTEX_KIND(%r8)
+ jne 81f
+
+ orl $FUTEX_CMP_REQUEUE, %esi
+
+ /* Wake up all threads. */
movl $SYS_futex, %eax
movl $1, %edx
movl $0x7fffffff, %r10d
@@ -97,6 +100,21 @@ __pthread_cond_broadcast:
10: xorl %eax, %eax
retq
+81: orl $FUTEX_CMP_REQUEUE_PI, %esi
+
+ /* Wake up all threads. */
+ movl $SYS_futex, %eax
+ movl $1, %edx
+ movl $0x7fffffff, %r10d
+ syscall
+
+ /* For any kind of error, which mainly is EAGAIN, we try again
+ with WAKE. The general test also covers running on old
+ kernels. */
+ cmpq $-4095, %rax
+ jae 9f
+ jmp 10b
+
.align 16
/* Unlock. */
4: LOCK
@@ -128,6 +146,7 @@ __pthread_cond_broadcast:
movl $LLL_SHARED, %esi
cmovne %eax, %esi
callq __lll_unlock_wake
+ subq $cond_lock-cond_futex, %rdi
jmp 6b
/* Unlock in loop requires wakeup. */
diff -Nurp libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
--- libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S 2007-08-13 22:27:29.000000000 -0400
+++ libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S 2009-04-29 04:39:18.000000000 -0400
@@ -1,4 +1,5 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005, 2007, 2009
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -22,6 +23,7 @@
#include <lowlevellock.h>
#include <lowlevelcond.h>
#include <kernel-features.h>
+#include <pthread-pi-defines.h>
#include <pthread-errnos.h>
@@ -54,21 +56,29 @@ __pthread_cond_signal:
addq $1, wakeup_seq(%r8)
addl $1, (%rdi)
- /* Wake up one thread. */
+ xorl %esi, %esi
cmpq $-1, dep_mutex(%r8)
- movl $1, %edx
+ movl $0, %eax
#ifdef __ASSUME_PRIVATE_FUTEX
- movl $FUTEX_WAKE_OP, %eax
- movl $(FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG), %esi
- cmove %eax, %esi
+ movl $FUTEX_PRIVATE_FLAG, %esi
#else
- movl $0, %eax
movl %fs:PRIVATE_FUTEX, %esi
+#endif
cmove %eax, %esi
+ je 22f
+
+ /* Get the address of the mutex used. */
+ movq dep_mutex(%r8), %rcx
+
+ testl $PI_BIT, MUTEX_KIND(%rcx)
+ jne 23f
+
+22: /* Wake up one thread. */
+ movl $1, %edx
+ movl $SYS_futex, %eax
+
orl $FUTEX_WAKE_OP, %esi
-#endif
movl $1, %r10d
- movl $SYS_futex, %eax
#if cond_lock != 0
addq $cond_lock, %r8
#endif
@@ -85,9 +95,31 @@ __pthread_cond_signal:
xorl %eax, %eax
retq
-7: /* %esi should be either FUTEX_WAKE_OP or
- FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG from the previous syscall. */
- xorl $(FUTEX_WAKE ^ FUTEX_WAKE_OP), %esi
+23: /* Wake up one thread and requeue none in the PI Mutex case. */
+ movl $1, %edx
+ movl $SYS_futex, %eax
+
+ orl $FUTEX_CMP_REQUEUE_PI, %esi
+ movq %rcx, %r8
+ movl $0, %r10d
+ movl (%rdi), %r9d
+ syscall
+
+ movq %rdi, %r8
+ subq $cond_futex, %r8
+
+ /* For any kind of error, we try again with WAKE.
+ The general test also covers running on old kernels. */
+ cmpq $-4095, %rax
+ jb 4f
+
+7:
+#ifdef __ASSUME_PRIVATE_FUTEX
+ andl $FUTEX_PRIVATE_FLAG, %esi
+#else
+ andl %fs:PRIVATE_FUTEX, %esi
+#endif
+ orl $FUTEX_WAKE, %esi
movl $SYS_futex, %eax
/* %rdx should be 1 already from $FUTEX_WAKE_OP syscall.
movl $1, %edx */
diff -Nurp libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
--- libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S 2009-03-15 05:03:15.000000000 -0400
+++ libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S 2009-04-29 04:39:18.000000000 -0400
@@ -22,6 +22,7 @@
#include <lowlevellock.h>
#include <lowlevelcond.h>
#include <pthread-errnos.h>
+#include <pthread-pi-defines.h>
#include <kernel-features.h>
@@ -47,6 +48,9 @@ __pthread_cond_timedwait:
pushq %r14
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r14, 0)
+ pushq %r15
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r15, 0)
#define FRAME_SIZE 80
subq $FRAME_SIZE, %rsp
cfi_adjust_cfa_offset(FRAME_SIZE)
@@ -127,7 +131,7 @@ __pthread_cond_timedwait:
movl %edx, 4(%rsp)
/* Get the current time. */
-8:
+8: movq $0, %r15
#ifdef __NR_clock_gettime
/* Get the clock number. Note that the field in the condvar
structure stores the number minus 1. */
@@ -202,24 +206,51 @@ __pthread_cond_timedwait:
leaq 24(%rsp), %r10
cmpq $-1, dep_mutex(%rdi)
movq %r12, %rdx
+
+ movl $0, %eax
#ifdef __ASSUME_PRIVATE_FUTEX
- movl $FUTEX_WAIT, %eax
- movl $(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi
- cmove %eax, %esi
+ movl $FUTEX_PRIVATE_FLAG, %esi
#else
- movl $0, %eax
movl %fs:PRIVATE_FUTEX, %esi
+#endif
cmove %eax, %esi
+ je 41f
+
+ /* Get the address of the mutex used. */
+ movq dep_mutex(%rdi), %r8
+ /* Requeue to a PI mutex if the PI bit is set */
+ testl $PI_BIT, MUTEX_KIND(%r8)
+ jne 42f
+
+41:
# if FUTEX_WAIT != 0
orl $FUTEX_WAIT, %esi
# endif
-#endif
addq $cond_futex, %rdi
movl $SYS_futex, %eax
syscall
movq %rax, %r14
+ jmp 43f
+
+42: orl $FUTEX_WAIT_REQUEUE_PI, %esi
+ addq $cond_futex, %rdi
+ movl $SYS_futex, %eax
+ syscall
+ movq %rax, %r14
+
+ /* Old kernels may not support requeue functionality. Try
+ again with FUTEX_WAIT */
+#ifdef __ASSUME_PRIVATE_FUTEX
+ andl $FUTEX_PRIVATE_FLAG, %esi
+#else
+ andl %fs:PRIVATE_FUTEX, %esi
+#endif
+ cmpq $-4095, %rax
+ jae 41b
+ /* If REQUEUE_PI was successful we already hold the mutex lock */
+ movq $1, %r15
- movl (%rsp), %edi
+43: movl (%rsp), %edi
callq __pthread_disable_asynccancel
/* Lock. */
@@ -301,14 +332,22 @@ __pthread_cond_timedwait:
11: movq 48+CLEANUP_PREV(%rsp), %rdx
movq %rdx, %fs:CLEANUP
+ /* With requeue_pi, the lock is held in the kernel, so just return to application */
+ cmpq $1, %r15
+ movq $0, %rax
+ je 26f
+
movq 16(%rsp), %rdi
callq __pthread_mutex_cond_lock
- testq %rax, %rax
+26: testq %rax, %rax
cmoveq %r14, %rax
18: addq $FRAME_SIZE, %rsp
cfi_adjust_cfa_offset(-FRAME_SIZE)
+ popq %r15
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r15)
popq %r14
cfi_adjust_cfa_offset(-8)
cfi_restore(%r14)
@@ -323,10 +362,11 @@ __pthread_cond_timedwait:
/* Initial locking failed. */
1:
- cfi_adjust_cfa_offset(3 * 8 + FRAME_SIZE)
- cfi_rel_offset(%r12, FRAME_SIZE + 16)
- cfi_rel_offset(%r13, FRAME_SIZE + 8)
- cfi_rel_offset(%r14, FRAME_SIZE)
+ cfi_adjust_cfa_offset(4 * 8 + FRAME_SIZE)
+ cfi_rel_offset(%r12, FRAME_SIZE + 24)
+ cfi_rel_offset(%r13, FRAME_SIZE + 16)
+ cfi_rel_offset(%r14, FRAME_SIZE + 8)
+ cfi_rel_offset(%r15, FRAME_SIZE)
#if cond_lock != 0
addq $cond_lock, %rdi
#endif
diff -Nurp libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
--- libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S 2007-09-02 12:52:35.000000000 -0400
+++ libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S 2009-04-29 04:39:18.000000000 -0400
@@ -1,4 +1,5 @@
-/* Copyright (C) 2002,2003,2004,2005,2006,2007 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -22,6 +23,7 @@
#include <lowlevellock.h>
#include <lowlevelcond.h>
#include <tcb-offsets.h>
+#include <pthread-pi-defines.h>
#include <kernel-features.h>
@@ -154,6 +156,8 @@ __pthread_cond_wait:
.LSTARTCODE:
pushq %r12
.Lpush_r12:
+ pushq %r13
+.Lpush_r13:
#define FRAME_SIZE 64
subq $FRAME_SIZE, %rsp
.Lsubq:
@@ -239,26 +243,52 @@ __pthread_cond_wait:
movl %eax, (%rsp)
movq 8(%rsp), %rdi
+ xorq %r13, %r13
xorq %r10, %r10
movq %r12, %rdx
addq $cond_futex-cond_lock, %rdi
cmpq $-1, dep_mutex-cond_futex(%rdi)
+
+ movl $0, %eax
#ifdef __ASSUME_PRIVATE_FUTEX
- movl $FUTEX_WAIT, %eax
- movl $(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi
- cmove %eax, %esi
+ movl $FUTEX_PRIVATE_FLAG, %esi
#else
- movl $FUTEX_WAIT, %eax
movl %fs:PRIVATE_FUTEX, %esi
+#endif
cmove %eax, %esi
+ je 41f
+
+ /* Get the address of the mutex used. */
+ movq dep_mutex-cond_futex(%rdi), %r8
+ /* Requeue to a PI mutex if the PI bit is set */
+ testl $PI_BIT, MUTEX_KIND(%r8)
+ jne 42f
+
+41:
# if FUTEX_WAIT != 0
orl $FUTEX_WAIT, %esi
# endif
-#endif
+ movl $SYS_futex, %eax
+ syscall
+ jmp 43f
+
+42: orl $FUTEX_WAIT_REQUEUE_PI, %esi
movl $SYS_futex, %eax
syscall
- movl (%rsp), %edi
+ /* Old kernels may not support requeue functionality. Try
+ again with FUTEX_WAIT */
+#ifdef __ASSUME_PRIVATE_FUTEX
+ andl $FUTEX_PRIVATE_FLAG, %esi
+#else
+ andl %fs:PRIVATE_FUTEX, %esi
+#endif
+ cmpq $-4095, %rax
+ jae 41b
+ /* If REQUEUE_PI was successful we already hold the mutex lock */
+ movq $1, %r13
+
+43: movl (%rsp), %edi
callq __pthread_disable_asynccancel
/* Lock. */
@@ -329,11 +359,19 @@ __pthread_cond_wait:
11: movq 32+CLEANUP_PREV(%rsp), %rdx
movq %rdx, %fs:CLEANUP
+ /* With requeue_pi, the lock is held in the kernel,
+ so just return 0 to application */
+ cmpq $1, %r13
+ movq $0, %rax
+ je 14f
+
movq 16(%rsp), %rdi
callq __pthread_mutex_cond_lock
14: addq $FRAME_SIZE, %rsp
.Laddq:
+ popq %r13
+.Lpop_r13:
popq %r12
.Lpop_r12:
@@ -468,14 +506,23 @@ versioned_symbol (libpthread, __pthread_
.uleb128 16
.byte 0x8c # DW_CFA_offset %r12
.uleb128 2
- .byte 0x40+.Lsubq-.Lpush_r12 # DW_CFA_advance_loc+N
+ .byte 0x40+.Lpush_r13-.Lpush_r12 # DW_CFA_advance_loc+N
.byte 14 # DW_CFA_def_cfa_offset
- .uleb128 16+FRAME_SIZE
+ .uleb128 24
+ .byte 0x8d # DW_CFA_offset %r13
+ .uleb128 2
+ .byte 0x40+.Lsubq-.Lpush_r13 # DW_CFA_advance_loc+N
+ .byte 14 # DW_CFA_def_cfa_offset
+ .uleb128 24+FRAME_SIZE
.byte 3 # DW_CFA_advance_loc2
.2byte .Laddq-.Lsubq
.byte 14 # DW_CFA_def_cfa_offset
.uleb128 16
- .byte 0x40+.Lpop_r12-.Laddq # DW_CFA_advance_loc+N
+ .byte 0x40+.Lpop_r13-.Laddq # DW_CFA_advance_loc+N
+ .byte 14 # DW_CFA_def_cfa_offset
+ .uleb128 8
+ .byte 0xcd # DW_CFA_restore %r13
+ .byte 0x40+.Lpop_r12-.Lpop_r13 # DW_CFA_advance_loc+N
.byte 14 # DW_CFA_def_cfa_offset
.uleb128 8
.byte 0xcc # DW_CFA_restore %r12
@@ -484,5 +531,8 @@ versioned_symbol (libpthread, __pthread_
.uleb128 80
.byte 0x8c # DW_CFA_offset %r12
.uleb128 2
+ .byte 0x8d # DW_CFA_offset %r13
+ .uleb128 3
+
.align 8
.LENDFDE:
More information about the Libc-alpha
mailing list