This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PowerPC: Align power7 memcpy using VSX to quadword


Ping.

On 13-06-2014 12:27, Adhemerval Zanella wrote:
> This patch changes power7 memcpy to use VSX instructions only when
> memory is aligned to quardword (16b).  It is to avoid unaligned kernel
> traps on non-cacheable memory (for instance, memory-mapped I/O).
>
> Checked on ppc64be and ppc32be.
>
> --
>
> 2014-06-13  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
>
> 	* sysdeps/powerpc/powerpc64/power7/memcpy.S: Align VSX copies to 16B
> 	to avoid alignment traps in non-cacheable memory.
> 	* sysdeps/powerpc/powerpc32/power7/memcpy.S: Likewise.
>
> ---
>
> diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S
> index 52c2a6b..e540fea 100644
> --- a/sysdeps/powerpc/powerpc32/power7/memcpy.S
> +++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S
> @@ -38,8 +38,8 @@ EALIGN (memcpy, 5, 0)
>  	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
>  				    code.  */
>  
> -	andi.   11,3,7	      /* Check alignment of DST.  */
> -	clrlwi  10,4,29	      /* Check alignment of SRC.  */
> +	andi.   11,3,15	      /* Check alignment of DST.  */
> +	clrlwi  10,4,28	      /* Check alignment of SRC.  */
>  	cmplw   cr6,10,11     /* SRC and DST alignments match?  */
>  	mr	12,4
>  	mr	31,5
> diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
> index bbfd381..18467f6 100644
> --- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
> +++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
> @@ -36,16 +36,11 @@ EALIGN (memcpy, 5, 0)
>  	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
>  				    code.  */
>  
> -#ifdef __LITTLE_ENDIAN__
> -/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
> -   or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
> -   loop is only used for quadword aligned copies.  */
> +/* Align copies using VSX instructions to quadword. It is to avoid alignment
> +   traps when memcpy is used on non-cacheable memory (for instance, memory
> +   mapped I/O).  */
>  	andi.	10,3,15
>  	clrldi	11,4,60
> -#else
> -	andi.	10,3,7		/* Check alignment of DST.  */
> -	clrldi	11,4,61		/* Check alignment of SRC.  */
> -#endif
>  	cmpld	cr6,10,11	/* SRC and DST alignments match?  */
>  
>  	mr	dst,3
> @@ -53,11 +48,7 @@ EALIGN (memcpy, 5, 0)
>  	beq	L(aligned_copy)
>  
>  	mtocrf	0x01,0
> -#ifdef __LITTLE_ENDIAN__
>  	clrldi	0,0,60
> -#else
> -	clrldi	0,0,61
> -#endif
>  
>  /* Get the DST and SRC aligned to 8 bytes (16 for little-endian).  */
>  1:
> @@ -79,14 +70,12 @@ EALIGN (memcpy, 5, 0)
>  	stw	6,0(dst)
>  	addi	dst,dst,4
>  8:
> -#ifdef __LITTLE_ENDIAN__
>  	bf	28,16f
>  	ld	6,0(src)
>  	addi	src,src,8
>  	std	6,0(dst)
>  	addi	dst,dst,8
>  16:
> -#endif
>  	subf	cnt,0,cnt
>  
>  /* Main aligned copy loop. Copies 128 bytes at a time. */
> @@ -298,9 +287,7 @@ L(copy_LE_8):
>  	.align	4
>  L(copy_GE_32_unaligned):
>  	clrldi	0,0,60	      /* Number of bytes until the 1st dst quadword.  */
> -#ifndef __LITTLE_ENDIAN__
>  	andi.	10,3,15	      /* Check alignment of DST (against quadwords).  */
> -#endif
>  	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
>  
>  	beq	L(copy_GE_32_unaligned_cont)
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]