This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: PowerPC LE setjmp/longjmp

From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
To: libc-alpha at sourceware dot org
Date: Wed, 21 Aug 2013 10:05:11 -0300
Subject: Re: PowerPC LE setjmp/longjmp
References: <20130809045121 dot GZ3294 at bubble dot grove dot modra dot org>
Hi Alan,

I believe this patch is incomplete: it fails to build for PPC32 with following
compiler message:

../sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Assembler messages:
../sysdeps/powerpc/powerpc32/fpu/setjmp-common.S:103: Error: undefined symbol `_dl_hwcap' in operation
../sysdeps/powerpc/powerpc32/fpu/setjmp-common.S:103: Error: undefined symbol `LOWORD' in operation
../sysdeps/powerpc/powerpc32/fpu/setjmp-common.S:104: Error: undefined symbol `_dl_hwcap' in operation
../sysdeps/powerpc/powerpc32/fpu/setjmp-common.S:104: Error: undefined symbol `LOWORD' in operation

I tried to made quick search on your previous patches, but I couldn't find its
definition in any of them. I think this fix should suffice to fix it:

diff --git a/sysdeps/powerpc/jmpbuf-offsets.h b/sysdeps/powerpc/jmpbuf-offsets.h
index f2116bd..59292bb 100644
--- a/sysdeps/powerpc/jmpbuf-offsets.h
+++ b/sysdeps/powerpc/jmpbuf-offsets.h
@@ -33,4 +33,11 @@
 # define JB_SIZE   ((64 + (12 * 4)) * 4)
 # define JB_VRSAVE 62
 # define JB_VRS    64
+# ifdef __LITTLE_ENDIAN__
+#  define LOWORD   0
+#  define HIWORD   4
+# else
+#  define LOWORD   4
+#  define HIWORD   0
+# endif
 #endif

What do you think?


On 09-08-2013 01:51, Alan Modra wrote:
> Little-endian fixes for setjmp/longjmp.  When writing these I noticed
> the setjmp code corrupts the non volatile VMX registers when using an
> unaligned buffer.  Anton fixed this, and also simplified it quite a
> bit.
>
> The current code uses boilerplate for the case where we want to store
> 16 bytes to an unaligned address.  For that we have to do a
> read/modify/write of two aligned 16 byte quantities.  In our case we
> are storing a bunch of back to back data (consective VMX registers),
> and only the start and end of the region need the read/modify/write.
>
> 	2013-07-10  Anton Blanchard <anton@au1.ibm.com>
> 		    Alistair Popple <alistair@ozlabs.au.ibm.com>
> 		    Alan Modra <amodra@gmail.com>
>
> 	PR 15723
> 	* sysdeps/powerpc/jmpbuf-offsets.h: Comment fix.
> 	* sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Correct
> 	_dl_hwcap access for little-endian.
> 	* sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise.  Don't
> 	destroy vmx regs when saving unaligned.
> 	* sysdeps/powerpc/powerpc64/__longjmp-common.S: Correct CR load.
> 	* sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise CR save.  Don't
> 	destroy vmx regs when saving unaligned.
>
> diff --git a/sysdeps/powerpc/jmpbuf-offsets.h b/sysdeps/powerpc/jmpbuf-offsets.h
> index 64c658a..f2116bd 100644
> --- a/sysdeps/powerpc/jmpbuf-offsets.h
> +++ b/sysdeps/powerpc/jmpbuf-offsets.h
> @@ -21,12 +21,10 @@
>  #define JB_LR     2  /* The address we will return to */
>  #if __WORDSIZE == 64
>  # define JB_GPRS   3  /* GPRs 14 through 31 are saved, 18*2 words total.  */
> -# define JB_CR     21 /* Condition code registers with the VRSAVE at */
> -                       /* offset 172 (low half of the double word.  */
> +# define JB_CR     21 /* Shared dword with VRSAVE.  CR word at offset 172.  */
>  # define JB_FPRS   22 /* FPRs 14 through 31 are saved, 18*2 words total.  */
>  # define JB_SIZE   (64 * 8) /* As per PPC64-VMX ABI.  */
> -# define JB_VRSAVE 21 /* VRSAVE shares a double word with the CR at offset */
> -                       /* 168 (high half of the double word).  */
> +# define JB_VRSAVE 21 /* Shared dword with CR.  VRSAVE word at offset 168.  */
>  # define JB_VRS    40 /* VRs 20 through 31 are saved, 12*4 words total.  */
>  #else
>  # define JB_GPRS   3  /* GPRs 14 through 31 are saved, 18 in total.  */
> diff --git a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
> index 9d34cd9..e44161d 100644
> --- a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
> +++ b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
> @@ -43,16 +49,16 @@ ENTRY (__longjmp)
>  #   endif
>  	mtlr    r6
>  	cfi_same_value (lr)
> -	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
> +	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
>  #  else
>  	lwz     r5,_dl_hwcap@got(r5)
>  	mtlr    r6
>  	cfi_same_value (lr)
> -	lwz     r5,4(r5)
> +	lwz     r5,LOWORD(r5)
>  #  endif
>  # else
> -	lis	r5,(_dl_hwcap+4)@ha
> -	lwz     r5,(_dl_hwcap+4)@l(r5)
> +	lis	r5,(_dl_hwcap+LOWORD)@ha
> +	lwz     r5,(_dl_hwcap+LOWORD)@l(r5)
>  # endif
>  	andis.	r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
>  	beq	L(no_vmx)
> diff --git a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
> index 46ea2b0..14cb4ca 100644
> --- a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
> +++ b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
> @@ -94,14 +100,14 @@ ENTRY (__sigsetjmp)
>  #   else
>  	lwz     r5,_rtld_global_ro@got(r5)
>  #   endif
> -	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
> +	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
>  #  else
>  	lwz     r5,_dl_hwcap@got(r5)
> -	lwz     r5,4(r5)
> +	lwz     r5,LOWORD(r5)
>  #  endif
>  # else
> -	lis	r6,(_dl_hwcap+4)@ha
> -	lwz     r5,(_dl_hwcap+4)@l(r6)
> +	lis	r6,(_dl_hwcap+LOWORD)@ha
> +	lwz     r5,(_dl_hwcap+LOWORD)@l(r6)
>  # endif
>  	andis.	r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
>  	beq	L(no_vmx)
> @@ -111,44 +117,43 @@ ENTRY (__sigsetjmp)
>  	stw	r0,((JB_VRSAVE)*4)(3)
>  	addi	r6,r5,16
>  	beq+	L(aligned_save_vmx)
> -	lvsr	v0,0,r5
> -	vspltisb v1,-1         /* set v1 to all 1's */
> -	vspltisb v2,0          /* set v2 to all 0's */
> -	vperm   v3,v2,v1,v0   /* v3 contains shift mask with num all 1 bytes on left = misalignment  */
>
> +	lvsr	v0,0,r5
> +	lvsl	v1,0,r5
> +	addi	r6,r5,-16
>
> -	/* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */
> -	lvx     v5,0,r5
> -	vperm   v20,v20,v20,v0
> -	vsel    v5,v5,v20,v3
> -	vsel    v20,v20,v2,v3
> -	stvx    v5,0,r5
> +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
> +	addi	addgpr,addgpr,32;					 \
> +	vperm	tmpvr,prevvr,savevr,shiftvr;				 \
> +	stvx	tmpvr,0,savegpr
>
> -#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
> -	addi    addgpr,addgpr,32; \
> -	vperm   savevr,savevr,savevr,shiftvr; \
> -	vsel    hivr,prev_savevr,savevr,maskvr; \
> -	stvx    hivr,0,savegpr;
> +	/*
> +	 * We have to be careful not to corrupt the data below v20 and
> +	 * above v31. To keep things simple we just rotate both ends in
> +	 * the opposite direction to our main permute so we can use
> +	 * the common macro.
> +	 */
>
> -	save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
> -	save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
> -	save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
> -	save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
> -	save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
> -	save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
> -	save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
> -	save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
> -	save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
> -	save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
> +	/* load and rotate data below v20 */
> +	lvx	v2,0,r5
> +	vperm	v2,v2,v2,v1
> +	save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
> +	save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
> +	save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
> +	save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
> +	save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
> +	save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
> +	save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
> +	save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
> +	save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
> +	save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
> +	save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
> +	save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
> +	/* load and rotate data above v31 */
> +	lvx	v2,0,r6
> +	vperm	v2,v2,v2,v1
> +	save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
>
> -	/* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */
> -	addi    r5,r5,32
> -	vperm   v31,v31,v31,v0
> -	lvx     v4,0,r5
> -	vsel    v5,v30,v31,v3
> -	stvx    v5,0,r6
> -	vsel    v4,v31,v4,v3
> -	stvx    v4,0,r5
>  	b	L(no_vmx)
>
>  L(aligned_save_vmx):
> diff --git a/sysdeps/powerpc/powerpc64/__longjmp-common.S b/sysdeps/powerpc/powerpc64/__longjmp-common.S
> index 70c3704..21ff50f 100644
> --- a/sysdeps/powerpc/powerpc64/__longjmp-common.S
> +++ b/sysdeps/powerpc/powerpc64/__longjmp-common.S
> @@ -153,7 +153,7 @@ L(no_vmx):
>  	lfd fp21,((JB_FPRS+7)*8)(r3)
>  	ld r22,((JB_GPRS+8)*8)(r3)
>  	lfd fp22,((JB_FPRS+8)*8)(r3)
> -	ld r0,(JB_CR*8)(r3)
> +	lwz r0,((JB_CR*8)+4)(r3)
>  	ld r23,((JB_GPRS+9)*8)(r3)
>  	lfd fp23,((JB_FPRS+9)*8)(r3)
>  	ld r24,((JB_GPRS+10)*8)(r3)
> diff --git a/sysdeps/powerpc/powerpc64/setjmp-common.S b/sysdeps/powerpc/powerpc64/setjmp-common.S
> index 58ec610..1c8b7cb 100644
> --- a/sysdeps/powerpc/powerpc64/setjmp-common.S
> +++ b/sysdeps/powerpc/powerpc64/setjmp-common.S
> @@ -95,7 +95,7 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
>  	mfcr r0
>  	std  r16,((JB_GPRS+2)*8)(3)
>  	stfd fp16,((JB_FPRS+2)*8)(3)
> -	std  r0,(JB_CR*8)(3)
> +	stw  r0,((JB_CR*8)+4)(3)
>  	std  r17,((JB_GPRS+3)*8)(3)
>  	stfd fp17,((JB_FPRS+3)*8)(3)
>  	std  r18,((JB_GPRS+4)*8)(3)
> @@ -142,47 +142,43 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
>  	stw	r0,((JB_VRSAVE)*8)(3)
>  	addi	r6,r5,16
>  	beq+	L(aligned_save_vmx)
> -	lvsr	v0,0,r5
> -	vspltisb v1,-1         /* set v1 to all 1's */
> -	vspltisb v2,0          /* set v2 to all 0's */
> -	vperm   v3,v2,v1,v0   /* v3 contains shift mask with num all 1 bytes
> -				 on left = misalignment  */
>
> +	lvsr	v0,0,r5
> +	lvsl	v1,0,r5
> +	addi	r6,r5,-16
>
> -	/* Special case for v20 we need to preserve what is in save area
> -	   below v20 before obliterating it */
> -	lvx     v5,0,r5
> -	vperm   v20,v20,v20,v0
> -	vsel    v5,v5,v20,v3
> -	vsel    v20,v20,v2,v3
> -	stvx    v5,0,r5
> +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
> +	addi	addgpr,addgpr,32;					 \
> +	vperm	tmpvr,prevvr,savevr,shiftvr;				 \
> +	stvx	tmpvr,0,savegpr
>
> -# define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
> -	addi    addgpr,addgpr,32; \
> -	vperm   savevr,savevr,savevr,shiftvr; \
> -	vsel    hivr,prev_savevr,savevr,maskvr; \
> -	stvx    hivr,0,savegpr;
> +	/*
> +	 * We have to be careful not to corrupt the data below v20 and
> +	 * above v31. To keep things simple we just rotate both ends in
> +	 * the opposite direction to our main permute so we can use
> +	 * the common macro.
> +	 */
>
> -	save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
> -	save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
> -	save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
> -	save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
> -	save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
> -	save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
> -	save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
> -	save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
> -	save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
> -	save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
> +	/* load and rotate data below v20 */
> +	lvx	v2,0,r5
> +	vperm	v2,v2,v2,v1
> +	save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
> +	save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
> +	save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
> +	save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
> +	save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
> +	save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
> +	save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
> +	save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
> +	save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
> +	save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
> +	save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
> +	save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
> +	/* load and rotate data above v31 */
> +	lvx	v2,0,r6
> +	vperm	v2,v2,v2,v1
> +	save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
>
> -	/* Special case for r31 we need to preserve what is in save area
> -	   above v31 before obliterating it */
> -	addi    r5,r5,32
> -	vperm   v31,v31,v31,v0
> -	lvx     v4,0,r5
> -	vsel    v5,v30,v31,v3
> -	stvx    v5,0,r6
> -	vsel    v4,v31,v4,v3
> -	stvx    v4,0,r5
>  	b	L(no_vmx)
>
>  L(aligned_save_vmx):
>
Follow-Ups:
- Re: PowerPC LE setjmp/longjmp
  - From: Alan Modra
References:
- PowerPC LE setjmp/longjmp
  - From: Alan Modra
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]