This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH][BZ #6816][3/5] VSX implementation for *context routines (powerpc32)
- From: Carlos Eduardo Seo <cseo at linux dot vnet dot ibm dot com>
- To: libc-alpha at sourceware dot org
- Date: Tue, 05 Aug 2008 19:44:39 -0300
- Subject: [PATCH][BZ #6816][3/5] VSX implementation for *context routines (powerpc32)
- Openpgp: id=8BFFA900
VSX implementation for getcontext, setcontext and swapcontext (powerpc32).
--
Carlos Eduardo Seo
Software Engineer
IBM Linux Technology Center
2008-07-31 Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Steven Munroe <sjmunroe@us.ibm.com>
[BZ #6816]
* sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S:
Added a new scheme necessary for handling VSX registers, in
addition to the existing one.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S:
Likewise.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S:
Likewise.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S: Added
new ucontext size and symbol versioning code for GLIBC 2.9.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S: Likewise.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S: Likewise.
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S 2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S 2008-07-31 16:33:02.000000000 -0500
@@ -24,6 +24,7 @@
and if appropriate:
__CONTEXT_ENABLE_FPRS
__CONTEXT_ENABLE_VRS
+ __CONTEXT_ENABLE_VSRS
Any archecture that implements the Vector unit is assumed to also
implement the floating unit. */
@@ -105,7 +106,12 @@
stw r0,_UC_GREGS+(PT_MSR*4)(r3)
stw r0,_UC_GREGS+(PT_MQ*4)(r3)
-#ifdef __CONTEXT_ENABLE_FPRS
+/* For VSR registers, we need a new scheme for saving the
+ registers in order to perform less store operations. If the
+ processor does not have VSR, use the old scheme. */
+#ifndef __CONTEXT_ENABLE_VSRS
+ /* Old scheme for storing FP and VR registers. */
+# ifdef __CONTEXT_ENABLE_FPRS
/* Save the floating-point registers */
stfd fp0,_UC_FREGS+(0*8)(r3)
stfd fp1,_UC_FREGS+(1*8)(r3)
@@ -142,37 +148,37 @@
stfd fp31,_UC_FREGS+(31*8)(r3)
stfd fp0,_UC_FREGS+(32*8)(r3)
-# ifdef __CONTEXT_ENABLE_VRS
-# ifdef PIC
+# ifdef __CONTEXT_ENABLE_VRS
+# ifdef PIC
mflr r8
-# ifdef HAVE_ASM_PPC_REL16
+# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r7
addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
+# else
bl _GLOBAL_OFFSET_TABLE_@local-4
mflr r7
-# endif
-# ifdef SHARED
+# endif
+# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
-# else
+# else
lwz r7,_dl_hwcap@got(r7)
mtlr r8
lwz r7,0(r7)
-# endif
-# else
+# endif
+# else
lis r7,_dl_hwcap@ha
lwz r7,_dl_hwcap@l(r7)
-# endif
+# endif
andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
la r10,(_UC_VREGS)(r3)
la r9,(_UC_VREGS+16)(r3)
- beq 2f /* L(no_vec) */
+ beq 6f /* L(no_vec) */
/* address of the combined VSCR/VSAVE quadword. */
la r8,(_UC_VREGS+512)(r3)
@@ -264,9 +270,261 @@
stw r0,0(r8)
-2: /* L(no_vec): */
+6: /* L(no_vec): */
+# endif /* __CONTEXT_ENABLE_VRS */
+# endif /* __CONTEXT_ENABLE_FPRS */
+#else
+ /* New scheme for saving the VSR registers. We need to save
+ the last 32 VSR registers first.*/
+# ifdef PIC
+ mflr r8
+# ifdef HAVE_ASM_PPC_REL16
+ bcl 20,31,3f
+3: mflr r7
+ addis r7,r7,_GLOBAL_OFFSET_TABLE_-3b@ha
+ addi r7,r7,_GLOBAL_OFFSET_TABLE_-3b@l
+# else
+ bl _GLOBAL_OFFSET_TABLE_@local-4
+ mflr r7
+# endif
+# ifdef SHARED
+ lwz r7,_rtld_global_ro@got(r7)
+ mtlr r8
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
+# else
+ lwz r7,_dl_hwcap@got(r7)
+ mtlr r8
+ lwz r7,0(r7)
+# endif
+# else
+ lis r7,_dl_hwcap@ha
+ lwz r7,_dl_hwcap@l(r7)
# endif
+/* We check for VSX hwcap here. It is assumed that a processor that
+ has VSX registers also has Altivec capability. */
+ andi. r7,r7,PPC_FEATURE_HAS_VSX
+ la r6,(_UC_VSREGS)(r3)
+ beq 5f /* L(no_vs) */
+
+ la r10,(_UC_VREGS)(r3)
+ la r9,(_UC_VREGS+16)(r3)
+/* address of the combined VSCR/VSAVE quadword. */
+ la r8,(_UC_VREGS+512)(r3)
+
+/* Save the vector registers */
+ stvx v0,0,r10
+ stvx v1,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+/* We need to get the Vector Status and Control Register early to avoid
+ store order problems later with the VSAVE register that shares the
+ same quadword. */
+ mfvscr v0
+
+ stvx v2,0,r10
+ stvx v3,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v0,0,r8
+
+ stvx v4,0,r10
+ stvx v5,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v6,0,r10
+ stvx v7,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v8,0,r10
+ stvx v9,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v10,0,r10
+ stvx v11,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v12,0,r10
+ stvx v13,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v14,0,r10
+ stvx v15,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v16,0,r10
+ stvx v17,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v18,0,r10
+ stvx v19,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v20,0,r10
+ stvx v21,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v22,0,r10
+ stvx v23,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v24,0,r10
+ stvx v25,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v26,0,r10
+ stvx v27,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v28,0,r10
+ stvx v29,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ mfvscr v0
+ stvx v30,0,r10
+ stvx v31,0,r9
+ stw r0,0(r8)
+
+#ifdef HAVE_ASM_PPC_VSX
+ /* Only proceed with this if binutils can handle .machine "power7". */
+
+ /* Proceeding to the FP registers and the doubleword 1
+ of the first 32 VSR registers. */
+ la r7,(_UC_FREGS)(r3)
+ la r6,(_UC_VSREGS)(r3)
+ /* Save fp0 and fp1 into vs32. */
+ xxmrghd vs32,vs0,vs1
+ /* Save vs0[1] and vs1[1] into vs33. */
+ xxmrgld vs33,vs0,vs1
+ /* Save f0 and f1. */
+ stxvd2x vs32,0,r7
+ /* Save vs0[1] and vs1[1]. */
+ stxvd2x vs33,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs34,vs2,vs3
+ xxmrgld vs35,vs2,vs3
+ stxvd2x vs34,0,r7
+ stxvd2x vs35,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs36,vs4,vs5
+ xxmrgld vs37,vs4,vs5
+ stxvd2x vs36,0,r7
+ stxvd2x vs37,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs38,vs6,vs7
+ xxmrgld vs39,vs6,vs7
+ stxvd2x vs38,0,r7
+ stxvd2x vs39,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs40,vs8,vs9
+ xxmrgld vs41,vs8,vs9
+ stxvd2x vs40,0,r7
+ stxvd2x vs41,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs42,vs10,vs11
+ xxmrgld vs43,vs10,vs11
+ stxvd2x vs42,0,r7
+ stxvd2x vs43,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs44,vs12,vs13
+ xxmrgld vs45,vs12,vs13
+ stxvd2x vs44,0,r7
+ stxvd2x vs45,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs46,vs14,vs15
+ xxmrgld vs47,vs14,vs15
+ stxvd2x vs46,0,r7
+ stxvd2x vs47,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs48,vs16,vs17
+ xxmrgld vs49,vs16,vs17
+ stxvd2x vs48,0,r7
+ stxvd2x vs49,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs50,vs18,vs19
+ xxmrgld vs51,vs18,vs19
+ stxvd2x vs50,0,r7
+ stxvd2x vs51,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs52,vs20,vs21
+ xxmrgld vs53,vs20,vs21
+ stxvd2x vs52,0,r7
+ stxvd2x vs53,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs54,vs22,vs23
+ xxmrgld vs55,vs22,vs23
+ stxvd2x vs54,0,r7
+ stxvd2x vs55,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs56,vs24,vs25
+ xxmrgld vs57,vs24,vs25
+ stxvd2x vs56,0,r7
+ stxvd2x vs57,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs58,vs26,vs27
+ xxmrgld vs59,vs26,vs27
+ stxvd2x vs58,0,r7
+ stxvd2x vs59,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs60,vs28,vs29
+ xxmrgld vs61,vs28,vs29
+ stxvd2x vs60,0,r7
+ stxvd2x vs61,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs62,vs30,vs31
+ xxmrgld vs63,vs30,vs31
+ stxvd2x vs62,0,r7
+ stxvd2x vs63,0,r6
+
+#else
+# warning "Binutils does not support VSX instructions."
#endif
+5:/*L(no_vs): */
+#endif /* __CONTEXT_ENABLE_VSRS */
+
/* We need to set up parms and call sigprocmask which will clobber
volatile registers. So before the call we need to retrieve the
original ucontext ptr (parm1) from stack and store the UC_REGS_PTR
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S 2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S 2008-07-31 16:30:56.000000000 -0500
@@ -29,15 +29,17 @@
#define __CONTEXT_FUNC_NAME __getcontext
#define __CONTEXT_ENABLE_FPRS 1
#define __CONTEXT_ENABLE_VRS 1
+#define __CONTEXT_ENABLE_VSRS 1
/* Size of ucontext in GLIBC_2.3.4 and later. */
#define _UC_SIZE_2_3_4 1184
+#define _UC_SIZE_2_9 1440
#ifdef __ASSUME_SWAPCONTEXT_SYSCALL
.section ".text";
ENTRY (__getcontext)
li r4,0
- li r5,_UC_SIZE_2_3_4;
+ li r5,_UC_SIZE_2_9;
DO_CALL (SYS_ify (swapcontext));
bso- cr0,1f
/* the kernel does not set the return code for the success case */
@@ -50,16 +52,42 @@
# include "getcontext-common.S"
#endif
-versioned_symbol (libc, __getcontext, getcontext, GLIBC_2_3_4)
+versioned_symbol (libc, __getcontext, getcontext, GLIBC_2_9)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_4, GLIBC_2_9)
+ compat_text_section
+#ifdef __ASSUME_SWAPCONTEXT_SYSCALL
+ENTRY (__novsx_getcontext)
+ li r4,0
+ li r5,_UC_SIZE_2_3_4;
+ DO_CALL (SYS_ify (swapcontext));
+ bso- cr0,2f
+/* the kernel does not set the return code for the success case */
+ li r3,0
+ blr
+2:
+ b __syscall_error@local
+END (__novsx_getcontext)
+#else
+# undef __CONTEXT_ENABLE_VSRS
+# undef __CONTEXT_FUNC_NAME
+# define __CONTEXT_FUNC_NAME __novsx_getcontext
+# include "getcontext-common.S"
+
+ .previous
+#endif
+compat_symbol (libc, __novsx_getcontext, getcontext, GLIBC_2_3_4)
+#endif
#if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
compat_text_section
-# undef __CONTEXT_FUNC_NAME
+# undef __CONTEXT_FUNC_NAME
# define __CONTEXT_FUNC_NAME __novec_getcontext
+# undef __CONTEXT_ENABLE_VSRS
# undef __CONTEXT_ENABLE_VRS
-# clude "getcontext-common.S"
+# include "getcontext-common.S"
.previous
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S 2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S 2008-07-31 16:33:44.000000000 -0500
@@ -24,6 +24,7 @@
and if appropriate:
__CONTEXT_ENABLE_FPRS
__CONTEXT_ENABLE_VRS
+ __CONTEXT_ENABLE_VSRS
Any archecture that implements the Vector unit is assumed to also
implement the floating unit. */
@@ -71,32 +72,36 @@
cmpwi r3,0
bne 3f /* L(error_exit) */
-#ifdef __CONTEXT_ENABLE_FPRS
-# ifdef __CONTEXT_ENABLE_VRS
-# ifdef PIC
+/* For VSR registers, we need a new scheme for restoring the
+ registers in order to perform less load operations. If the
+ processor does not have VSR, use the old scheme. */
+#ifndef __CONTEXT_ENABLE_VSRS
+# ifdef __CONTEXT_ENABLE_FPRS
+# ifdef __CONTEXT_ENABLE_VRS
+# ifdef PIC
mflr r8
-# ifdef HAVE_ASM_PPC_REL16
+# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r7
addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
+# else
bl _GLOBAL_OFFSET_TABLE_@local-4
mflr r7
-# endif
-# ifdef SHARED
+# endif
+# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
-# else
+# else
lwz r7,_dl_hwcap@got(r7)
mtlr r8
lwz r7,0(r7)
-# endif
-# else
+# endif
+# else
lis r7,_dl_hwcap@ha
lwz r7,_dl_hwcap@l(r7)
-# endif
+# endif
andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
la r10,(_UC_VREGS)(r31)
beq 2f /* L(has_no_vec) */
@@ -195,7 +200,7 @@
lvx v11,0,r9
2: /* L(has_no_vec): */
-# endif /* __CONTEXT_ENABLE_VRS */
+# endif /* __CONTEXT_ENABLE_VRS */
/* Restore the floating-point registers */
lfd fp31,_UC_FREGS+(32*8)(r31)
lfd fp0,_UC_FREGS+(0*8)(r31)
@@ -231,7 +236,259 @@
lfd fp29,_UC_FREGS+(29*8)(r31)
lfd fp30,_UC_FREGS+(30*8)(r31)
lfd fp31,_UC_FREGS+(31*8)(r31)
-#endif /* __CONTEXT_ENABLE_FPRS */
+# endif /* __CONTEXT_ENABLE_FPRS */
+#else
+ /* New scheme for restoring the VSR registers. We need to load
+ the last 32 VSR registers first.*/
+# ifdef PIC
+ mflr r8
+# ifdef HAVE_ASM_PPC_REL16
+ bcl 20,31,5f
+5: mflr r7
+ addis r7,r7,_GLOBAL_OFFSET_TABLE_-5b@ha
+ addi r7,r7,_GLOBAL_OFFSET_TABLE_-5b@l
+# else
+ bl _GLOBAL_OFFSET_TABLE_@local-4
+ mflr r7
+# endif
+ mtlr r8
+# ifdef SHARED
+ lwz r7,_rtld_global_ro@got(r7)
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
+# else
+ lwz r7,_dl_hwcap@got(r7)
+ lwz r7,0(r7)
+# endif
+# else
+ lis r7,_dl_hwcap@ha
+ lwz r7,_dl_hwcap@l(r7)
+# endif
+/* We check for VSX hwcap here. It is assumed that a processor that
+ has VSX registers also has Altivec capability. */
+ andi. r7,r7,PPC_FEATURE_HAS_VSX
+ la r6,(_UC_VSREGS)(r31)
+ beq 6f /* L(no_vs) */
+
+#ifdef HAVE_ASM_PPC_VSX
+ /* Only proceed with this if binutils can handle .machine "power7". */
+
+/* Using VMX registers as temps to minimize the number of loads for
+ restoring the FP and the doubleword 1 of VSR[0-31]. */
+ la r7,(_UC_FREGS)(r31)
+ la r6,(_UC_VSREGS)(r31)
+ /* Load f0 and f1 register state into vs32. */
+ lxvd2x vs32,0,r7
+ /* Load vs0[1] and vs1[1] register state into vs33. */
+ lxvd2x vs33,0,r6
+ /* Merge f0 and vs0[1] register state into vs0. */
+ xxmrghd vs0,vs32,vs33
+ /* Merge f1 and vs1[1] register state into vs1. */
+ xxmrgld vs1,vs32,vs33
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs34,0,r7
+ lxvd2x vs35,0,r6
+ xxmrghd vs2,vs34,vs35
+ xxmrghd vs3,vs34,vs35
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs36,0,r7
+ lxvd2x vs37,0,r6
+ xxmrghd vs4,vs36,vs37
+ xxmrghd vs5,vs36,vs37
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs38,0,r7
+ lxvd2x vs39,0,r6
+ xxmrghd vs6,vs38,vs39
+ xxmrghd vs7,vs38,vs39
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs40,0,r7
+ lxvd2x vs41,0,r6
+ xxmrghd vs8,vs40,vs41
+ xxmrghd vs9,vs40,vs41
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs42,0,r7
+ lxvd2x vs43,0,r6
+ xxmrghd vs10,vs42,vs43
+ xxmrghd vs11,vs42,vs43
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs44,0,r7
+ lxvd2x vs45,0,r6
+ xxmrghd vs12,vs44,vs45
+ xxmrghd vs13,vs44,vs45
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs46,0,r7
+ lxvd2x vs47,0,r6
+ xxmrghd vs14,vs46,vs47
+ xxmrghd vs15,vs46,vs47
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs48,0,r7
+ lxvd2x vs49,0,r6
+ xxmrghd vs16,vs48,vs49
+ xxmrghd vs17,vs48,vs49
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs50,0,r7
+ lxvd2x vs51,0,r6
+ xxmrghd vs18,vs50,vs51
+ xxmrghd vs19,vs50,vs51
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs52,0,r7
+ lxvd2x vs53,0,r6
+ xxmrghd vs20,vs52,vs53
+ xxmrghd vs21,vs52,vs53
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs54,0,r7
+ lxvd2x vs55,0,r6
+ xxmrghd vs22,vs54,vs55
+ xxmrghd vs23,vs54,vs55
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs56,0,r7
+ lxvd2x vs57,0,r6
+ xxmrghd vs24,vs56,vs57
+ xxmrghd vs25,vs56,vs57
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs58,0,r7
+ lxvd2x vs59,0,r6
+ xxmrghd vs26,vs58,vs59
+ xxmrghd vs27,vs58,vs59
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs60,0,r7
+ lxvd2x vs61,0,r6
+ xxmrghd vs28,vs60,vs61
+ xxmrghd vs29,vs60,vs61
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs62,0,r7
+ lxvd2x vs63,0,r6
+ xxmrghd vs30,vs62,vs63
+ xxmrghd vs31,vs62,vs63
+
+#else
+# warning "Binutils does not support VSX instructions."
+#endif
+
+/* Now we can proceed restoring the VMX registers. */
+ la r10,(_UC_VREGS)(r31)
+ lwz r0,(32*16)(r10)
+ li r9,(32*16)
+ cmpwi r0,0
+ mtspr VRSAVE,r0
+ lvx v19,r9,r10
+ la r9,(16)(r10)
+
+ lvx v0,0,r10
+ lvx v1,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ mtvscr v19
+ lvx v2,0,r10
+ lvx v3,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v4,0,r10
+ lvx v5,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v6,0,r10
+ lvx v7,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v8,0,r10
+ lvx v9,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v10,0,r10
+ lvx v11,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v12,0,r10
+ lvx v13,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v14,0,r10
+ lvx v15,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v16,0,r10
+ lvx v17,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v18,0,r10
+ lvx v19,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v20,0,r10
+ lvx v21,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v22,0,r10
+ lvx v23,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v24,0,r10
+ lvx v25,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v26,0,r10
+ lvx v27,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v28,0,r10
+ lvx v29,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v30,0,r10
+ lvx v31,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v10,0,r10
+ lvx v11,0,r9
+
+6: /* L(no_vs): */
+#endif /* __CONTEXT_ENABLE_VSRS */
/* Restore LR and CCR, and set CTR to the NIP value */
lwz r3,_UC_GREGS+(PT_LNK*4)(r31)
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S 2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S 2008-07-31 16:30:56.000000000 -0500
@@ -29,16 +29,18 @@
#define __CONTEXT_FUNC_NAME __setcontext
#define __CONTEXT_ENABLE_FPRS 1
#define __CONTEXT_ENABLE_VRS 1
+#define __CONTEXT_ENABLE_VSRS 1
/* Size of ucontext in GLIBC_2.3.4 and later. */
#define _UC_SIZE_2_3_4 1184
+#define _UC_SIZE_2_9 1440
#ifdef __ASSUME_SWAPCONTEXT_SYSCALL
.section ".text";
ENTRY (__setcontext)
mr r4,r3
li r3,0
- li r5,_UC_SIZE_2_3_4;
+ li r5,_UC_SIZE_2_9;
DO_CALL (SYS_ify (swapcontext));
bso- cr0,1f
/* the kernel does not set the return code for the success case */
@@ -51,7 +53,33 @@
# include "setcontext-common.S"
#endif
-versioned_symbol (libc, __setcontext, setcontext, GLIBC_2_3_4)
+versioned_symbol (libc, __setcontext, setcontext, GLIBC_2_9)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_4, GLIBC_2_9)
+ compat_text_section
+#ifdef __ASSUME_SWAPCONTEXT_SYSCALL
+ENTRY (__novsx_setcontext)
+ mr r4,r3
+ li r3,0
+ li r5,_UC_SIZE_2_3_4;
+ DO_CALL (SYS_ify (swapcontext));
+ bso- cr0,2f
+/* the kernel does not set the return code for the success case */
+ li r3,0
+ blr
+2:
+ b __syscall_error@local
+END (__novsx_setcontext)
+#else
+# undef __CONTEXT_ENABLE_VSRS
+# undef __CONTEXT_FUNC_NAME
+# define __CONTEXT_FUNC_NAME __novsx_setcontext
+# include "setcontext-common.S"
+
+ .previous
+#endif
+compat_symbol (libc, __novsx_setcontext, setcontext, GLIBC_2_3_4)
+#endif
#if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
@@ -59,6 +87,7 @@
# undef __CONTEXT_FUNC_NAME
# define __CONTEXT_FUNC_NAME __novec_setcontext
+# undef __CONTEXT_ENABLE_VSRS
# undef __CONTEXT_ENABLE_VRS
# include "setcontext-common.S"
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S 2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S 2008-07-31 16:35:49.000000000 -0500
@@ -24,6 +24,7 @@
and if appropriate:
__CONTEXT_ENABLE_FPRS
__CONTEXT_ENABLE_VRS
+ __CONTEXT_ENABLE_VSRS
Any archecture that implements the Vector unit is assumed to also
implement the floating unit. */
@@ -107,7 +108,12 @@
stw r0,_UC_GREGS+(PT_MSR*4)(r3)
stw r0,_UC_GREGS+(PT_MQ*4)(r3)
-#ifdef __CONTEXT_ENABLE_FPRS
+/* For VSR registers, we need a new scheme for saving the
+ registers in order to perform less store operations. If the
+ processor does not have VSR, use the old scheme. */
+#ifndef __CONTEXT_ENABLE_VSRS
+ /* Old scheme for storing FP and VR registers. */
+# ifdef __CONTEXT_ENABLE_FPRS
/* Save the floating-point registers */
stfd fp0,_UC_FREGS+(0*8)(r3)
stfd fp1,_UC_FREGS+(1*8)(r3)
@@ -143,32 +149,32 @@
stfd fp30,_UC_FREGS+(30*8)(r3)
stfd fp31,_UC_FREGS+(31*8)(r3)
stfd fp0,_UC_FREGS+(32*8)(r3)
-
-# ifdef __CONTEXT_ENABLE_VRS
-# ifdef PIC
+
+# ifdef __CONTEXT_ENABLE_VRS
+# ifdef PIC
mflr r8
-# ifdef HAVE_ASM_PPC_REL16
+# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r7
addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
+# else
bl _GLOBAL_OFFSET_TABLE_@local-4
mflr r7
-# endif
-# ifdef SHARED
+# endif
+# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
-# else
+# else
lwz r7,_dl_hwcap@got(r7)
mtlr r8
lwz r7,0(r7)
-# endif
-# else
+# endif
+# else
lis r7,_dl_hwcap@ha
lwz r7,_dl_hwcap@l(r7)
-# endif
+# endif
andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
la r10,(_UC_VREGS)(r3)
@@ -267,8 +273,259 @@
stw r0,0(r8)
2: /*L(no_vec):*/
-# endif /* __CONTEXT_ENABLE_VRS */
-#endif /* __CONTEXT_ENABLE_FPRS */
+# endif /* __CONTEXT_ENABLE_VRS */
+# endif /* __CONTEXT_ENABLE_FPRS */
+#else
+ /* New scheme for saving the VSR registers. We need to save
+ the last 32 VSR registers first.*/
+# ifdef PIC
+ mflr r8
+# ifdef HAVE_ASM_PPC_REL16
+ bcl 20,31,7f
+7: mflr r7
+ addis r7,r7,_GLOBAL_OFFSET_TABLE_-7b@ha
+ addi r7,r7,_GLOBAL_OFFSET_TABLE_-7b@l
+# else
+ bl _GLOBAL_OFFSET_TABLE_@local-4
+ mflr r7
+# endif
+# ifdef SHARED
+ lwz r7,_rtld_global_ro@got(r7)
+ mtlr r8
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
+# else
+ lwz r7,_dl_hwcap@got(r7)
+ mtlr r8
+ lwz r7,0(r7)
+# endif
+# else
+ lis r7,_dl_hwcap@ha
+ lwz r7,_dl_hwcap@l(r7)
+# endif
+/* We check for VSX hwcap here. It is assumed that a processor that
+ has VSX registers also has Altivec capability. */
+ andi. r7,r7,PPC_FEATURE_HAS_VSX
+ la r6,(_UC_VSREGS)(r3)
+ beq 8f /* L(no_vs) */
+
+ la r10,(_UC_VREGS)(r3)
+ la r9,(_UC_VREGS+16)(r3)
+/* address of the combined VSCR/VSAVE quadword. */
+ la r8,(_UC_VREGS+512)(r3)
+
+/* Save the vector registers */
+ stvx v0,0,r10
+ stvx v1,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+/* We need to get the Vector Status and Control Register early to avoid
+ store order problems later with the VSAVE register that shares the
+ same quadword. */
+ mfvscr v0
+
+ stvx v2,0,r10
+ stvx v3,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v0,0,r8
+
+ stvx v4,0,r10
+ stvx v5,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v6,0,r10
+ stvx v7,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v8,0,r10
+ stvx v9,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v10,0,r10
+ stvx v11,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v12,0,r10
+ stvx v13,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v14,0,r10
+ stvx v15,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v16,0,r10
+ stvx v17,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v18,0,r10
+ stvx v19,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v20,0,r10
+ stvx v21,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v22,0,r10
+ stvx v23,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v24,0,r10
+ stvx v25,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v26,0,r10
+ stvx v27,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ stvx v28,0,r10
+ stvx v29,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ mfvscr v0
+ stvx v30,0,r10
+ stvx v31,0,r9
+ stw r0,0(r8)
+
+#ifdef HAVE_ASM_PPC_VSX
+ /* Only proceed with this if binutils can handle .machine "power7". */
+
+ /* Proceeding to the FP registers and the doubleword 1
+ of the first 32 VSR registers. */
+ la r7,(_UC_FREGS)(r3)
+ la r6,(_UC_VSREGS)(r3)
+ /* Save fp0 and fp1 into vs32. */
+ xxmrghd vs32,vs0,vs1
+ /* Save vs0[1] and vs1[1] into vs33. */
+ xxmrgld vs33,vs0,vs1
+ /* Save f0 and f1. */
+ stxvd2x vs32,0,r7
+ /* Save vs0[1] and vs1[1]. */
+ stxvd2x vs33,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs34,vs2,vs3
+ xxmrgld vs35,vs2,vs3
+ stxvd2x vs34,0,r7
+ stxvd2x vs35,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs36,vs4,vs5
+ xxmrgld vs37,vs4,vs5
+ stxvd2x vs36,0,r7
+ stxvd2x vs37,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs38,vs6,vs7
+ xxmrgld vs39,vs6,vs7
+ stxvd2x vs38,0,r7
+ stxvd2x vs39,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs40,vs8,vs9
+ xxmrgld vs41,vs8,vs9
+ stxvd2x vs40,0,r7
+ stxvd2x vs41,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs42,vs10,vs11
+ xxmrgld vs43,vs10,vs11
+ stxvd2x vs42,0,r7
+ stxvd2x vs43,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs44,vs12,vs13
+ xxmrgld vs45,vs12,vs13
+ stxvd2x vs44,0,r7
+ stxvd2x vs45,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs46,vs14,vs15
+ xxmrgld vs47,vs14,vs15
+ stxvd2x vs46,0,r7
+ stxvd2x vs47,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs48,vs16,vs17
+ xxmrgld vs49,vs16,vs17
+ stxvd2x vs48,0,r7
+ stxvd2x vs49,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs50,vs18,vs19
+ xxmrgld vs51,vs18,vs19
+ stxvd2x vs50,0,r7
+ stxvd2x vs51,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs52,vs20,vs21
+ xxmrgld vs53,vs20,vs21
+ stxvd2x vs52,0,r7
+ stxvd2x vs53,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs54,vs22,vs23
+ xxmrgld vs55,vs22,vs23
+ stxvd2x vs54,0,r7
+ stxvd2x vs55,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs56,vs24,vs25
+ xxmrgld vs57,vs24,vs25
+ stxvd2x vs56,0,r7
+ stxvd2x vs57,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs58,vs26,vs27
+ xxmrgld vs59,vs26,vs27
+ stxvd2x vs58,0,r7
+ stxvd2x vs59,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs60,vs28,vs29
+ xxmrgld vs61,vs28,vs29
+ stxvd2x vs60,0,r7
+ stxvd2x vs61,0,r6
+ addi r7,r7,16
+ addi r6,r6,16
+
+ xxmrghd vs62,vs30,vs31
+ xxmrgld vs63,vs30,vs31
+ stxvd2x vs62,0,r7
+ stxvd2x vs63,0,r6
+
+#else
+# warning "Binutils does not support VSX instructions."
+#endif /* HAVE_ASM_PPC_VSX */
+8:/*L(no_vs): */
+#endif /* __CONTEXT_ENABLE_VSRS */
/* Restore ucontext (parm1) from stack. */
lwz r12,_FRAME_PARM_SAVE1(r1)
@@ -297,33 +554,38 @@
cmpwi r0,0
bne 4f /* L(do_sigret) */
-#ifdef __CONTEXT_ENABLE_FPRS
-# ifdef __CONTEXT_ENABLE_VRS
+/* For VSR registers, we need a new scheme for restoring the
+ registers in order to perform less load operations. If the
+ processor does not have VSR, use the old scheme. */
+#ifndef __CONTEXT_ENABLE_VSRS
+# ifdef __CONTEXT_ENABLE_FPRS
+# ifdef __CONTEXT_ENABLE_VRS
-# ifdef PIC
+# ifdef PIC
mflr r8
-# ifdef HAVE_ASM_PPC_REL16
+# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,5f
5: mflr r7
addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
+# else
bl _GLOBAL_OFFSET_TABLE_@local-4
mflr r7
-# endif
+# endif
mtlr r8
-# ifdef SHARED
+# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
-# else
+# else
lwz r7,_dl_hwcap@got(r7)
lwz r7,0(r7)
-# endif
-# else
+# endif
+# else
lis r7,_dl_hwcap@ha
lwz r7,_dl_hwcap@l(r7)
-# endif
+# endif
andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
+
la r10,(_UC_VREGS)(r31)
beq 6f /* L(has_no_vec) */
@@ -421,7 +683,7 @@
lvx v11,0,r9
6: /* L(has_no_vec): */
-# endif /* __CONTEXT_ENABLE_VRS */
+# endif /* __CONTEXT_ENABLE_VRS */
/* Restore the floating-point registers */
lfd fp31,_UC_FREGS+(32*8)(r31)
lfd fp0,_UC_FREGS+(0*8)(r31)
@@ -457,7 +719,258 @@
lfd fp29,_UC_FREGS+(29*8)(r31)
lfd fp30,_UC_FREGS+(30*8)(r31)
lfd fp31,_UC_FREGS+(31*8)(r31)
-#endif /* __CONTEXT_ENABLE_FPRS */
+# endif /* __CONTEXT_ENABLE_FPRS */
+#else
+ /* New scheme for restoring the VSR registers. We need to load
+ the last 32 VSR registers first.*/
+# ifdef PIC
+ mflr r8
+# ifdef HAVE_ASM_PPC_REL16
+ bcl 20,31,9f
+9: mflr r7
+ addis r7,r7,_GLOBAL_OFFSET_TABLE_-9b@ha
+ addi r7,r7,_GLOBAL_OFFSET_TABLE_-9b@l
+# else
+ bl _GLOBAL_OFFSET_TABLE_@local-4
+ mflr r7
+# endif
+ mtlr r8
+# ifdef SHARED
+ lwz r7,_rtld_global_ro@got(r7)
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7)
+# else
+ lwz r7,_dl_hwcap@got(r7)
+ lwz r7,0(r7)
+# endif
+# else
+ lis r7,_dl_hwcap@ha
+ lwz r7,_dl_hwcap@l(r7)
+# endif
+/* We check for VSX hwcap here. It is assumed that a processor that
+ has VSX registers also has Altivec capability. */
+ andi. r7,r7,PPC_FEATURE_HAS_VSX
+ la r6,(_UC_VSREGS)(r31)
+ beq L(no_vs)
+
+#ifdef HAVE_ASM_PPC_VSX
+ /* Only proceed with this if binutils can handle .machine "power7". */
+
+/* Using VMX registers as temps to minimize the number of loads for
+ restoring the FP and the doubleword 1 of VSR[0-31]. */
+ la r7,(_UC_FREGS)(r31)
+ la r6,(_UC_VSREGS)(r31)
+ /* Load f0 and f1 register state into vs32. */
+ lxvd2x vs32,0,r7
+ /* Load vs0[1] and vs1[1] register state into vs33. */
+ lxvd2x vs33,0,r6
+ /* Merge f0 and vs0[1] register state into vs0. */
+ xxmrghd vs0,vs32,vs33
+ /* Merge f1 and vs1[1] register state into vs1. */
+ xxmrgld vs1,vs32,vs33
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs34,0,r7
+ lxvd2x vs35,0,r6
+ xxmrghd vs2,vs34,vs35
+ xxmrghd vs3,vs34,vs35
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs36,0,r7
+ lxvd2x vs37,0,r6
+ xxmrghd vs4,vs36,vs37
+ xxmrghd vs5,vs36,vs37
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs38,0,r7
+ lxvd2x vs39,0,r6
+ xxmrghd vs6,vs38,vs39
+ xxmrghd vs7,vs38,vs39
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs40,0,r7
+ lxvd2x vs41,0,r6
+ xxmrghd vs8,vs40,vs41
+ xxmrghd vs9,vs40,vs41
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs42,0,r7
+ lxvd2x vs43,0,r6
+ xxmrghd vs10,vs42,vs43
+ xxmrghd vs11,vs42,vs43
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs44,0,r7
+ lxvd2x vs45,0,r6
+ xxmrghd vs12,vs44,vs45
+ xxmrghd vs13,vs44,vs45
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs46,0,r7
+ lxvd2x vs47,0,r6
+ xxmrghd vs14,vs46,vs47
+ xxmrghd vs15,vs46,vs47
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs48,0,r7
+ lxvd2x vs49,0,r6
+ xxmrghd vs16,vs48,vs49
+ xxmrghd vs17,vs48,vs49
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs50,0,r7
+ lxvd2x vs51,0,r6
+ xxmrghd vs18,vs50,vs51
+ xxmrghd vs19,vs50,vs51
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs52,0,r7
+ lxvd2x vs53,0,r6
+ xxmrghd vs20,vs52,vs53
+ xxmrghd vs21,vs52,vs53
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs54,0,r7
+ lxvd2x vs55,0,r6
+ xxmrghd vs22,vs54,vs55
+ xxmrghd vs23,vs54,vs55
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs56,0,r7
+ lxvd2x vs57,0,r6
+ xxmrghd vs24,vs56,vs57
+ xxmrghd vs25,vs56,vs57
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs58,0,r7
+ lxvd2x vs59,0,r6
+ xxmrghd vs26,vs58,vs59
+ xxmrghd vs27,vs58,vs59
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs60,0,r7
+ lxvd2x vs61,0,r6
+ xxmrghd vs28,vs60,vs61
+ xxmrghd vs29,vs60,vs61
+ addi r7,r7,16
+ addi r6,r6,16
+
+ lxvd2x vs62,0,r7
+ lxvd2x vs63,0,r6
+ xxmrghd vs30,vs62,vs63
+ xxmrghd vs31,vs62,vs63
+
+#else
+# warning "Binutils does not support VSX instructions."
+#endif
+/* Now we can proceed restoring the VMX registers. */
+ la r10,(_UC_VREGS)(r31)
+ lwz r0,(32*16)(r10)
+ li r9,(32*16)
+ cmpwi r0,0
+ mtspr VRSAVE,r0
+ lvx v19,r9,r10
+ la r9,(16)(r10)
+
+ lvx v0,0,r10
+ lvx v1,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ mtvscr v19
+ lvx v2,0,r10
+ lvx v3,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v4,0,r10
+ lvx v5,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v6,0,r10
+ lvx v7,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v8,0,r10
+ lvx v9,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v10,0,r10
+ lvx v11,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v12,0,r10
+ lvx v13,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v14,0,r10
+ lvx v15,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v16,0,r10
+ lvx v17,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v18,0,r10
+ lvx v19,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v20,0,r10
+ lvx v21,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v22,0,r10
+ lvx v23,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v24,0,r10
+ lvx v25,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v26,0,r10
+ lvx v27,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v28,0,r10
+ lvx v29,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v30,0,r10
+ lvx v31,0,r9
+ addi r10,r10,32
+ addi r9,r9,32
+
+ lvx v10,0,r10
+ lvx v11,0,r9
+
+L(no_vs):
+#endif /* __CONTEXT_ENABLE_VSRS */
/* Restore LR and CCR, and set CTR to the NIP value */
lwz r3,_UC_GREGS+(PT_LNK*4)(r31)
Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S
===================================================================
--- libc.orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S 2008-07-31 16:29:47.000000000 -0500
+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S 2008-07-31 16:30:56.000000000 -0500
@@ -29,14 +29,16 @@
#define __CONTEXT_FUNC_NAME __swapcontext
#define __CONTEXT_ENABLE_FPRS 1
#define __CONTEXT_ENABLE_VRS 1
+#define __CONTEXT_ENABLE_VSRS 1
/* Size of ucontext in GLIBC_2.3.4 and later. */
#define _UC_SIZE_2_3_4 1184
+#define _UC_SIZE_2_9 1440
#ifdef __ASSUME_SWAPCONTEXT_SYSCALL
.section ".text";
ENTRY (__swapcontext)
- li r5,_UC_SIZE_2_3_4;
+ li r5,_UC_SIZE_2_9;
DO_CALL (SYS_ify (swapcontext));
bso- cr0,1f
/* the kernel does not set the return code for the success case */
@@ -49,7 +51,31 @@
# include "swapcontext-common.S"
#endif
-versioned_symbol (libc, __swapcontext, swapcontext, GLIBC_2_3_4)
+versioned_symbol (libc, __swapcontext, swapcontext, GLIBC_2_9)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_4, GLIBC_2_9)
+ compat_text_section
+#ifdef __ASSUME_SWAPCONTEXT_SYSCALL
+ENTRY (__novsx_swapcontext)
+ li r5,_UC_SIZE_2_3_4;
+ DO_CALL (SYS_ify (swapcontext));
+ bso- cr0,2f
+/* the kernel does not set the return code for the success case */
+ li r3,0
+ blr
+2:
+ b __syscall_error@local
+END (__novsx_swapcontext)
+#else
+# undef __CONTEXT_ENABLE_VSRS
+# undef __CONTEXT_FUNC_NAME
+# define __CONTEXT_FUNC_NAME __novsx_swapcontext
+# include "swapcontext-common.S"
+
+ .previous
+#endif
+compat_symbol (libc, __novsx_swapcontext, swapcontext, GLIBC_2_3_4)
+#endif
#if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
@@ -57,6 +83,7 @@
# undef __CONTEXT_FUNC_NAME
# define __CONTEXT_FUNC_NAME __novec_swapcontext
+# undef __CONTEXT_ENABLE_VSRS
# undef __CONTEXT_ENABLE_VRS
# include "swapcontext-common.S"