This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC PATCH 2/2] Add x86_64 arch support for Requeue PI


This patch adds the x86_64 optimised code for requeue PI
Testing is same as mentioned in the previous mail

2009-05-05  Dinakar Guniguntala <dino@in.ibm.com>

        * sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Add
        support for using FUTEX_CMP_REQUEUE_PI for PI mutexes. For all
        the pthread_cond_xxx APIs, a Vanilla Private mutex is considered
        as the fast path and the PI mutex changes here ensure that the
        fast path is not affected.
        * sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise.
        * sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Add
        support for using FUTEX_WAIT_REQUEUE_PI for PI mutexes.
        * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.


diff -Nurp libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
--- libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S	2007-08-13 22:27:03.000000000 -0400
+++ libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S	2009-04-29 04:39:18.000000000 -0400
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -70,19 +70,22 @@ __pthread_cond_broadcast:
 8:	cmpq	$-1, %r8
 	je	9f
 
-	/* XXX: The kernel so far doesn't support requeue to PI futex.  */
-	/* XXX: The kernel only supports FUTEX_CMP_REQUEUE to the same
-	   type of futex (private resp. shared).  */
-	testl	$(PI_BIT | PS_BIT), MUTEX_KIND(%r8)
+	/* Do not use requeue for pshared condvars.  */
+	testl	$PS_BIT, MUTEX_KIND(%r8)
 	jne	9f
 
-	/* Wake up all threads.  */
 #ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$(FUTEX_CMP_REQUEUE|FUTEX_PRIVATE_FLAG), %esi
+	movl	$FUTEX_PRIVATE_FLAG, %esi
 #else
 	movl	%fs:PRIVATE_FUTEX, %esi
-	orl	$FUTEX_CMP_REQUEUE, %esi
 #endif
+	/* Requeue to a PI mutex if the PI bit is set */
+	testl	$PI_BIT, MUTEX_KIND(%r8)
+	jne	81f
+	
+	orl	$FUTEX_CMP_REQUEUE, %esi
+
+	/* Wake up all threads.  */
 	movl	$SYS_futex, %eax
 	movl	$1, %edx
 	movl	$0x7fffffff, %r10d
@@ -97,6 +100,21 @@ __pthread_cond_broadcast:
 10:	xorl	%eax, %eax
 	retq
 
+81:	orl	$FUTEX_CMP_REQUEUE_PI, %esi
+
+	/* Wake up all threads.  */
+	movl	$SYS_futex, %eax
+	movl	$1, %edx
+	movl	$0x7fffffff, %r10d
+	syscall
+
+	/* For any kind of error, which mainly is EAGAIN, we try again
+	   with WAKE.  The general test also covers running on old
+	   kernels.  */
+	cmpq	$-4095, %rax
+	jae	9f
+	jmp	10b
+
 	.align	16
 	/* Unlock.  */
 4:	LOCK
@@ -128,6 +146,7 @@ __pthread_cond_broadcast:
 	movl	$LLL_SHARED, %esi
 	cmovne	%eax, %esi
 	callq	__lll_unlock_wake
+	subq	$cond_lock-cond_futex, %rdi
 	jmp	6b
 
 	/* Unlock in loop requires wakeup.  */
diff -Nurp libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
--- libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S	2007-08-13 22:27:29.000000000 -0400
+++ libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S	2009-04-29 04:39:18.000000000 -0400
@@ -1,4 +1,5 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005, 2007, 2009
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -22,6 +23,7 @@
 #include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <kernel-features.h>
+#include <pthread-pi-defines.h>
 #include <pthread-errnos.h>
 
 
@@ -54,21 +56,29 @@ __pthread_cond_signal:
 	addq	$1, wakeup_seq(%r8)
 	addl	$1, (%rdi)
 
-	/* Wake up one thread.  */
+	xorl	%esi, %esi
 	cmpq	$-1, dep_mutex(%r8)
-	movl	$1, %edx
+	movl	$0, %eax
 #ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE_OP, %eax
-	movl	$(FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
+	movl	$FUTEX_PRIVATE_FLAG, %esi
 #else
-	movl	$0, %eax
 	movl	%fs:PRIVATE_FUTEX, %esi
+#endif
 	cmove	%eax, %esi
+	je	22f
+
+	/* Get the address of the mutex used.  */
+	movq    dep_mutex(%r8), %rcx
+
+	testl	$PI_BIT, MUTEX_KIND(%rcx)
+	jne	23f
+
+22:	/* Wake up one thread.  */
+	movl	$1, %edx
+	movl	$SYS_futex, %eax
+
 	orl	$FUTEX_WAKE_OP, %esi
-#endif
 	movl	$1, %r10d
-	movl	$SYS_futex, %eax
 #if cond_lock != 0
 	addq	$cond_lock, %r8
 #endif
@@ -85,9 +95,31 @@ __pthread_cond_signal:
 	xorl	%eax, %eax
 	retq
 
-7:	/* %esi should be either FUTEX_WAKE_OP or
-	   FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG from the previous syscall.  */
-	xorl	$(FUTEX_WAKE ^ FUTEX_WAKE_OP), %esi
+23:	/* Wake up one thread and requeue none in the PI Mutex case.  */
+	movl	$1, %edx
+	movl	$SYS_futex, %eax
+
+	orl	$FUTEX_CMP_REQUEUE_PI, %esi
+	movq	%rcx, %r8
+	movl	$0, %r10d
+	movl	(%rdi), %r9d
+	syscall
+
+	movq	%rdi, %r8
+	subq	$cond_futex, %r8
+	
+	/* For any kind of error, we try again with WAKE.
+	   The general test also covers running on old kernels.  */
+	cmpq	$-4095, %rax
+	jb	4f
+
+7:
+#ifdef __ASSUME_PRIVATE_FUTEX
+	andl	$FUTEX_PRIVATE_FLAG, %esi
+#else
+	andl	%fs:PRIVATE_FUTEX, %esi
+#endif
+	orl	$FUTEX_WAKE, %esi
 	movl	$SYS_futex, %eax
 	/* %rdx should be 1 already from $FUTEX_WAKE_OP syscall.
 	movl	$1, %edx  */
diff -Nurp libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
--- libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S	2009-03-15 05:03:15.000000000 -0400
+++ libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S	2009-04-29 04:39:18.000000000 -0400
@@ -22,6 +22,7 @@
 #include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <pthread-errnos.h>
+#include <pthread-pi-defines.h>
 
 #include <kernel-features.h>
 
@@ -47,6 +48,9 @@ __pthread_cond_timedwait:
 	pushq	%r14
 	cfi_adjust_cfa_offset(8)
 	cfi_rel_offset(%r14, 0)
+	pushq	%r15
+	cfi_adjust_cfa_offset(8)
+	cfi_rel_offset(%r15, 0)
 #define FRAME_SIZE 80
 	subq	$FRAME_SIZE, %rsp
 	cfi_adjust_cfa_offset(FRAME_SIZE)
@@ -127,7 +131,7 @@ __pthread_cond_timedwait:
 	movl	%edx, 4(%rsp)
 
 	/* Get the current time.  */
-8:
+8:	movq	$0, %r15
 #ifdef __NR_clock_gettime
 	/* Get the clock number.  Note that the field in the condvar
 	   structure stores the number minus 1.  */
@@ -202,24 +206,51 @@ __pthread_cond_timedwait:
 	leaq	24(%rsp), %r10
 	cmpq	$-1, dep_mutex(%rdi)
 	movq	%r12, %rdx
+
+	movl	$0, %eax
 #ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAIT, %eax
-	movl	$(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
+	movl	$FUTEX_PRIVATE_FLAG, %esi
 #else
-	movl	$0, %eax
 	movl	%fs:PRIVATE_FUTEX, %esi
+#endif
 	cmove	%eax, %esi
+	je	41f
+
+	/* Get the address of the mutex used.  */
+	movq	dep_mutex(%rdi), %r8
+	/* Requeue to a PI mutex if the PI bit is set */
+        testl	$PI_BIT, MUTEX_KIND(%r8)
+        jne	42f
+
+41:
 # if FUTEX_WAIT != 0
 	orl	$FUTEX_WAIT, %esi
 # endif
-#endif
 	addq	$cond_futex, %rdi
 	movl	$SYS_futex, %eax
 	syscall
 	movq	%rax, %r14
+	jmp	43f
+
+42:	orl	$FUTEX_WAIT_REQUEUE_PI, %esi
+	addq	$cond_futex, %rdi
+	movl	$SYS_futex, %eax
+	syscall
+	movq	%rax, %r14
+
+	/* Old kernels may not support requeue functionality. Try
+	again with FUTEX_WAIT */
+#ifdef __ASSUME_PRIVATE_FUTEX
+	andl	$FUTEX_PRIVATE_FLAG, %esi
+#else
+	andl	%fs:PRIVATE_FUTEX, %esi
+#endif
+	cmpq	$-4095, %rax
+	jae	41b
+	/* If REQUEUE_PI was successful we already hold the mutex lock */
+	movq	$1, %r15
 
-	movl	(%rsp), %edi
+43: 	movl	(%rsp), %edi
 	callq	__pthread_disable_asynccancel
 
 	/* Lock.  */
@@ -301,14 +332,22 @@ __pthread_cond_timedwait:
 11:	movq	48+CLEANUP_PREV(%rsp), %rdx
 	movq	%rdx, %fs:CLEANUP
 
+	/* With requeue_pi, the lock is held in the kernel, so just return to application */
+	cmpq	$1, %r15
+	movq	$0, %rax
+	je	26f
+
 	movq	16(%rsp), %rdi
 	callq	__pthread_mutex_cond_lock
 
-	testq	%rax, %rax
+26:	testq	%rax, %rax
 	cmoveq	%r14, %rax
 
 18:	addq	$FRAME_SIZE, %rsp
 	cfi_adjust_cfa_offset(-FRAME_SIZE)
+	popq	%r15
+	cfi_adjust_cfa_offset(-8)
+	cfi_restore(%r15)
 	popq	%r14
 	cfi_adjust_cfa_offset(-8)
 	cfi_restore(%r14)
@@ -323,10 +362,11 @@ __pthread_cond_timedwait:
 
 	/* Initial locking failed.  */
 1:
-	cfi_adjust_cfa_offset(3 * 8 + FRAME_SIZE)
-	cfi_rel_offset(%r12, FRAME_SIZE + 16)
-	cfi_rel_offset(%r13, FRAME_SIZE + 8)
-	cfi_rel_offset(%r14, FRAME_SIZE)
+	cfi_adjust_cfa_offset(4 * 8 + FRAME_SIZE)
+	cfi_rel_offset(%r12, FRAME_SIZE + 24)
+	cfi_rel_offset(%r13, FRAME_SIZE + 16)
+	cfi_rel_offset(%r14, FRAME_SIZE + 8)
+	cfi_rel_offset(%r15, FRAME_SIZE)
 #if cond_lock != 0
 	addq	$cond_lock, %rdi
 #endif
diff -Nurp libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
--- libc-20090427-1/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S	2007-09-02 12:52:35.000000000 -0400
+++ libc-20090427-2/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S	2009-04-29 04:39:18.000000000 -0400
@@ -1,4 +1,5 @@
-/* Copyright (C) 2002,2003,2004,2005,2006,2007 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -22,6 +23,7 @@
 #include <lowlevellock.h>
 #include <lowlevelcond.h>
 #include <tcb-offsets.h>
+#include <pthread-pi-defines.h>
 
 #include <kernel-features.h>
 
@@ -154,6 +156,8 @@ __pthread_cond_wait:
 .LSTARTCODE:
 	pushq	%r12
 .Lpush_r12:
+	pushq	%r13
+.Lpush_r13:
 #define FRAME_SIZE 64
 	subq	$FRAME_SIZE, %rsp
 .Lsubq:
@@ -239,26 +243,52 @@ __pthread_cond_wait:
 	movl	%eax, (%rsp)
 
 	movq	8(%rsp), %rdi
+	xorq	%r13, %r13
 	xorq	%r10, %r10
 	movq	%r12, %rdx
 	addq	$cond_futex-cond_lock, %rdi
 	cmpq	$-1, dep_mutex-cond_futex(%rdi)
+
+	movl	$0, %eax
 #ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAIT, %eax
-	movl	$(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
+	movl	$FUTEX_PRIVATE_FLAG, %esi
 #else
-	movl	$FUTEX_WAIT, %eax
 	movl	%fs:PRIVATE_FUTEX, %esi
+#endif
 	cmove	%eax, %esi
+	je	41f
+
+	/* Get the address of the mutex used.  */
+	movq	dep_mutex-cond_futex(%rdi), %r8
+	/* Requeue to a PI mutex if the PI bit is set */
+        testl	$PI_BIT, MUTEX_KIND(%r8)
+	jne	42f
+
+41:
 # if FUTEX_WAIT != 0
 	orl	$FUTEX_WAIT, %esi
 # endif
-#endif
+	movl	$SYS_futex, %eax
+	syscall
+	jmp 43f
+
+42:	orl	$FUTEX_WAIT_REQUEUE_PI, %esi
 	movl	$SYS_futex, %eax
 	syscall
 
-	movl	(%rsp), %edi
+	/* Old kernels may not support requeue functionality. Try
+	again with FUTEX_WAIT */
+#ifdef __ASSUME_PRIVATE_FUTEX
+	andl	$FUTEX_PRIVATE_FLAG, %esi
+#else
+	andl	%fs:PRIVATE_FUTEX, %esi
+#endif
+	cmpq	$-4095, %rax
+	jae	41b
+	/* If REQUEUE_PI was successful we already hold the mutex lock */
+	movq	$1, %r13
+
+43:	movl	(%rsp), %edi
 	callq	__pthread_disable_asynccancel
 
 	/* Lock.  */
@@ -329,11 +359,19 @@ __pthread_cond_wait:
 11:	movq	32+CLEANUP_PREV(%rsp), %rdx
 	movq	%rdx, %fs:CLEANUP
 
+	/* With requeue_pi, the lock is held in the kernel,
+	   so just return 0 to application */
+	cmpq	$1, %r13
+	movq	$0, %rax
+	je	14f
+
 	movq	16(%rsp), %rdi
 	callq	__pthread_mutex_cond_lock
 14:	addq	$FRAME_SIZE, %rsp
 .Laddq:
 
+	popq	%r13
+.Lpop_r13:
 	popq	%r12
 .Lpop_r12:
 
@@ -468,14 +506,23 @@ versioned_symbol (libpthread, __pthread_
 	.uleb128 16
 	.byte	0x8c				# DW_CFA_offset %r12
 	.uleb128 2
-	.byte	0x40+.Lsubq-.Lpush_r12		# DW_CFA_advance_loc+N
+	.byte	0x40+.Lpush_r13-.Lpush_r12	# DW_CFA_advance_loc+N
 	.byte	14				# DW_CFA_def_cfa_offset
-	.uleb128 16+FRAME_SIZE
+	.uleb128 24
+	.byte	0x8d				# DW_CFA_offset %r13
+	.uleb128 2
+	.byte	0x40+.Lsubq-.Lpush_r13		# DW_CFA_advance_loc+N
+	.byte	14				# DW_CFA_def_cfa_offset
+	.uleb128 24+FRAME_SIZE
 	.byte	3				# DW_CFA_advance_loc2
 	.2byte	.Laddq-.Lsubq
 	.byte	14				# DW_CFA_def_cfa_offset
 	.uleb128 16
-	.byte	0x40+.Lpop_r12-.Laddq		# DW_CFA_advance_loc+N
+	.byte	0x40+.Lpop_r13-.Laddq		# DW_CFA_advance_loc+N
+	.byte	14				# DW_CFA_def_cfa_offset
+	.uleb128 8
+	.byte	0xcd				# DW_CFA_restore %r13
+	.byte	0x40+.Lpop_r12-.Lpop_r13	# DW_CFA_advance_loc+N
 	.byte	14				# DW_CFA_def_cfa_offset
 	.uleb128 8
 	.byte	0xcc				# DW_CFA_restore %r12
@@ -484,5 +531,8 @@ versioned_symbol (libpthread, __pthread_
 	.uleb128 80
 	.byte	0x8c				# DW_CFA_offset %r12
 	.uleb128 2
+	.byte   0x8d				# DW_CFA_offset %r13
+        .uleb128 3
+
 	.align	8
 .LENDFDE:


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]