This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: PowerPC LE setjmp/longjmp
- From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
- To: libc-alpha at sourceware dot org
- Date: Wed, 21 Aug 2013 10:05:11 -0300
- Subject: Re: PowerPC LE setjmp/longjmp
- References: <20130809045121 dot GZ3294 at bubble dot grove dot modra dot org>
Hi Alan,
I believe this patch is incomplete: it fails to build for PPC32 with following
compiler message:
../sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Assembler messages:
../sysdeps/powerpc/powerpc32/fpu/setjmp-common.S:103: Error: undefined symbol `_dl_hwcap' in operation
../sysdeps/powerpc/powerpc32/fpu/setjmp-common.S:103: Error: undefined symbol `LOWORD' in operation
../sysdeps/powerpc/powerpc32/fpu/setjmp-common.S:104: Error: undefined symbol `_dl_hwcap' in operation
../sysdeps/powerpc/powerpc32/fpu/setjmp-common.S:104: Error: undefined symbol `LOWORD' in operation
I tried to made quick search on your previous patches, but I couldn't find its
definition in any of them. I think this fix should suffice to fix it:
diff --git a/sysdeps/powerpc/jmpbuf-offsets.h b/sysdeps/powerpc/jmpbuf-offsets.h
index f2116bd..59292bb 100644
--- a/sysdeps/powerpc/jmpbuf-offsets.h
+++ b/sysdeps/powerpc/jmpbuf-offsets.h
@@ -33,4 +33,11 @@
# define JB_SIZE ((64 + (12 * 4)) * 4)
# define JB_VRSAVE 62
# define JB_VRS 64
+# ifdef __LITTLE_ENDIAN__
+# define LOWORD 0
+# define HIWORD 4
+# else
+# define LOWORD 4
+# define HIWORD 0
+# endif
#endif
What do you think?
On 09-08-2013 01:51, Alan Modra wrote:
> Little-endian fixes for setjmp/longjmp. When writing these I noticed
> the setjmp code corrupts the non volatile VMX registers when using an
> unaligned buffer. Anton fixed this, and also simplified it quite a
> bit.
>
> The current code uses boilerplate for the case where we want to store
> 16 bytes to an unaligned address. For that we have to do a
> read/modify/write of two aligned 16 byte quantities. In our case we
> are storing a bunch of back to back data (consective VMX registers),
> and only the start and end of the region need the read/modify/write.
>
> 2013-07-10 Anton Blanchard <anton@au1.ibm.com>
> Alistair Popple <alistair@ozlabs.au.ibm.com>
> Alan Modra <amodra@gmail.com>
>
> PR 15723
> * sysdeps/powerpc/jmpbuf-offsets.h: Comment fix.
> * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Correct
> _dl_hwcap access for little-endian.
> * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise. Don't
> destroy vmx regs when saving unaligned.
> * sysdeps/powerpc/powerpc64/__longjmp-common.S: Correct CR load.
> * sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise CR save. Don't
> destroy vmx regs when saving unaligned.
>
> diff --git a/sysdeps/powerpc/jmpbuf-offsets.h b/sysdeps/powerpc/jmpbuf-offsets.h
> index 64c658a..f2116bd 100644
> --- a/sysdeps/powerpc/jmpbuf-offsets.h
> +++ b/sysdeps/powerpc/jmpbuf-offsets.h
> @@ -21,12 +21,10 @@
> #define JB_LR 2 /* The address we will return to */
> #if __WORDSIZE == 64
> # define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18*2 words total. */
> -# define JB_CR 21 /* Condition code registers with the VRSAVE at */
> - /* offset 172 (low half of the double word. */
> +# define JB_CR 21 /* Shared dword with VRSAVE. CR word at offset 172. */
> # define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */
> # define JB_SIZE (64 * 8) /* As per PPC64-VMX ABI. */
> -# define JB_VRSAVE 21 /* VRSAVE shares a double word with the CR at offset */
> - /* 168 (high half of the double word). */
> +# define JB_VRSAVE 21 /* Shared dword with CR. VRSAVE word at offset 168. */
> # define JB_VRS 40 /* VRs 20 through 31 are saved, 12*4 words total. */
> #else
> # define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18 in total. */
> diff --git a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
> index 9d34cd9..e44161d 100644
> --- a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
> +++ b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
> @@ -43,16 +49,16 @@ ENTRY (__longjmp)
> # endif
> mtlr r6
> cfi_same_value (lr)
> - lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
> + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
> # else
> lwz r5,_dl_hwcap@got(r5)
> mtlr r6
> cfi_same_value (lr)
> - lwz r5,4(r5)
> + lwz r5,LOWORD(r5)
> # endif
> # else
> - lis r5,(_dl_hwcap+4)@ha
> - lwz r5,(_dl_hwcap+4)@l(r5)
> + lis r5,(_dl_hwcap+LOWORD)@ha
> + lwz r5,(_dl_hwcap+LOWORD)@l(r5)
> # endif
> andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
> beq L(no_vmx)
> diff --git a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
> index 46ea2b0..14cb4ca 100644
> --- a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
> +++ b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
> @@ -94,14 +100,14 @@ ENTRY (__sigsetjmp)
> # else
> lwz r5,_rtld_global_ro@got(r5)
> # endif
> - lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
> + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
> # else
> lwz r5,_dl_hwcap@got(r5)
> - lwz r5,4(r5)
> + lwz r5,LOWORD(r5)
> # endif
> # else
> - lis r6,(_dl_hwcap+4)@ha
> - lwz r5,(_dl_hwcap+4)@l(r6)
> + lis r6,(_dl_hwcap+LOWORD)@ha
> + lwz r5,(_dl_hwcap+LOWORD)@l(r6)
> # endif
> andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
> beq L(no_vmx)
> @@ -111,44 +117,43 @@ ENTRY (__sigsetjmp)
> stw r0,((JB_VRSAVE)*4)(3)
> addi r6,r5,16
> beq+ L(aligned_save_vmx)
> - lvsr v0,0,r5
> - vspltisb v1,-1 /* set v1 to all 1's */
> - vspltisb v2,0 /* set v2 to all 0's */
> - vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes on left = misalignment */
>
> + lvsr v0,0,r5
> + lvsl v1,0,r5
> + addi r6,r5,-16
>
> - /* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */
> - lvx v5,0,r5
> - vperm v20,v20,v20,v0
> - vsel v5,v5,v20,v3
> - vsel v20,v20,v2,v3
> - stvx v5,0,r5
> +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
> + addi addgpr,addgpr,32; \
> + vperm tmpvr,prevvr,savevr,shiftvr; \
> + stvx tmpvr,0,savegpr
>
> -#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
> - addi addgpr,addgpr,32; \
> - vperm savevr,savevr,savevr,shiftvr; \
> - vsel hivr,prev_savevr,savevr,maskvr; \
> - stvx hivr,0,savegpr;
> + /*
> + * We have to be careful not to corrupt the data below v20 and
> + * above v31. To keep things simple we just rotate both ends in
> + * the opposite direction to our main permute so we can use
> + * the common macro.
> + */
>
> - save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
> - save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
> - save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
> - save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
> - save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
> - save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
> - save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
> - save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
> - save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
> - save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
> + /* load and rotate data below v20 */
> + lvx v2,0,r5
> + vperm v2,v2,v2,v1
> + save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
> + save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
> + save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
> + save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
> + save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
> + save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
> + save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
> + save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
> + save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
> + save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
> + save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
> + save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
> + /* load and rotate data above v31 */
> + lvx v2,0,r6
> + vperm v2,v2,v2,v1
> + save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
>
> - /* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */
> - addi r5,r5,32
> - vperm v31,v31,v31,v0
> - lvx v4,0,r5
> - vsel v5,v30,v31,v3
> - stvx v5,0,r6
> - vsel v4,v31,v4,v3
> - stvx v4,0,r5
> b L(no_vmx)
>
> L(aligned_save_vmx):
> diff --git a/sysdeps/powerpc/powerpc64/__longjmp-common.S b/sysdeps/powerpc/powerpc64/__longjmp-common.S
> index 70c3704..21ff50f 100644
> --- a/sysdeps/powerpc/powerpc64/__longjmp-common.S
> +++ b/sysdeps/powerpc/powerpc64/__longjmp-common.S
> @@ -153,7 +153,7 @@ L(no_vmx):
> lfd fp21,((JB_FPRS+7)*8)(r3)
> ld r22,((JB_GPRS+8)*8)(r3)
> lfd fp22,((JB_FPRS+8)*8)(r3)
> - ld r0,(JB_CR*8)(r3)
> + lwz r0,((JB_CR*8)+4)(r3)
> ld r23,((JB_GPRS+9)*8)(r3)
> lfd fp23,((JB_FPRS+9)*8)(r3)
> ld r24,((JB_GPRS+10)*8)(r3)
> diff --git a/sysdeps/powerpc/powerpc64/setjmp-common.S b/sysdeps/powerpc/powerpc64/setjmp-common.S
> index 58ec610..1c8b7cb 100644
> --- a/sysdeps/powerpc/powerpc64/setjmp-common.S
> +++ b/sysdeps/powerpc/powerpc64/setjmp-common.S
> @@ -95,7 +95,7 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
> mfcr r0
> std r16,((JB_GPRS+2)*8)(3)
> stfd fp16,((JB_FPRS+2)*8)(3)
> - std r0,(JB_CR*8)(3)
> + stw r0,((JB_CR*8)+4)(3)
> std r17,((JB_GPRS+3)*8)(3)
> stfd fp17,((JB_FPRS+3)*8)(3)
> std r18,((JB_GPRS+4)*8)(3)
> @@ -142,47 +142,43 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
> stw r0,((JB_VRSAVE)*8)(3)
> addi r6,r5,16
> beq+ L(aligned_save_vmx)
> - lvsr v0,0,r5
> - vspltisb v1,-1 /* set v1 to all 1's */
> - vspltisb v2,0 /* set v2 to all 0's */
> - vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes
> - on left = misalignment */
>
> + lvsr v0,0,r5
> + lvsl v1,0,r5
> + addi r6,r5,-16
>
> - /* Special case for v20 we need to preserve what is in save area
> - below v20 before obliterating it */
> - lvx v5,0,r5
> - vperm v20,v20,v20,v0
> - vsel v5,v5,v20,v3
> - vsel v20,v20,v2,v3
> - stvx v5,0,r5
> +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
> + addi addgpr,addgpr,32; \
> + vperm tmpvr,prevvr,savevr,shiftvr; \
> + stvx tmpvr,0,savegpr
>
> -# define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
> - addi addgpr,addgpr,32; \
> - vperm savevr,savevr,savevr,shiftvr; \
> - vsel hivr,prev_savevr,savevr,maskvr; \
> - stvx hivr,0,savegpr;
> + /*
> + * We have to be careful not to corrupt the data below v20 and
> + * above v31. To keep things simple we just rotate both ends in
> + * the opposite direction to our main permute so we can use
> + * the common macro.
> + */
>
> - save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
> - save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
> - save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
> - save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
> - save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
> - save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
> - save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
> - save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
> - save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
> - save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
> + /* load and rotate data below v20 */
> + lvx v2,0,r5
> + vperm v2,v2,v2,v1
> + save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
> + save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
> + save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
> + save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
> + save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
> + save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
> + save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
> + save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
> + save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
> + save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
> + save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
> + save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
> + /* load and rotate data above v31 */
> + lvx v2,0,r6
> + vperm v2,v2,v2,v1
> + save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
>
> - /* Special case for r31 we need to preserve what is in save area
> - above v31 before obliterating it */
> - addi r5,r5,32
> - vperm v31,v31,v31,v0
> - lvx v4,0,r5
> - vsel v5,v30,v31,v3
> - stvx v5,0,r6
> - vsel v4,v31,v4,v3
> - stvx v4,0,r5
> b L(no_vmx)
>
> L(aligned_save_vmx):
>