This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PowerPC LE strcmp and strncmp


Hi Alan,

This patch is ok, I even noticed an improvement for strncmp ppc32.


On 09-08-2013 02:20, Alan Modra wrote:
> More little-endian support.  I leave the main strcmp loops unchanged,
> (well, except for renumbering rTMP to something other than r0 since
> it's needed in an addi insn) and modify the tail for little-endian.
>
> I noticed some of the big-endian tail code was a little untidy so have
> cleaned that up too.
>
> 	* sysdeps/powerpc/powerpc64/strcmp.S (rTMP2): Define as r0.
> 	(rTMP): Define as r11.
> 	(strcmp): Add little-endian support.  Optimise tail.
> 	* sysdeps/powerpc/powerpc32/strcmp.S: Similarly.
> 	* sysdeps/powerpc/powerpc64/strncmp.S: Likewise.
> 	* sysdeps/powerpc/powerpc32/strncmp.S: Likewise.
> 	* sysdeps/powerpc/powerpc64/power4/strncmp.S: Likewise.
> 	* sysdeps/powerpc/powerpc32/power4/strncmp.S: Likewise.
> 	* sysdeps/powerpc/powerpc64/power7/strncmp.S: Likewise.
> 	* sysdeps/powerpc/powerpc32/power7/strncmp.S: Likewise.
>
> diff --git a/sysdeps/powerpc/powerpc64/strcmp.S b/sysdeps/powerpc/powerpc64/strcmp.S
> index c9d6dac..7085468 100644
> --- a/sysdeps/powerpc/powerpc64/strcmp.S
> +++ b/sysdeps/powerpc/powerpc64/strcmp.S
> @@ -25,7 +25,7 @@
>  EALIGN (strcmp, 4, 0)
>  	CALL_MCOUNT 2
>
> -#define rTMP	r0
> +#define rTMP2	r0
>  #define rRTN	r3
>  #define rSTR1	r3	/* first string arg */
>  #define rSTR2	r4	/* second string arg */
> @@ -35,6 +35,7 @@ EALIGN (strcmp, 4, 0)
>  #define r7F7F	r8	/* constant 0x7f7f7f7f7f7f7f7f */
>  #define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
>  #define rBITDIF	r10	/* bits that differ in s1 & s2 words */
> +#define rTMP	r11
>
>  	dcbt	0,rSTR1
>  	or	rTMP, rSTR2, rSTR1
> @@ -58,19 +59,66 @@ L(g0):	ldu	rWORD1, 8(rSTR1)
>  	ldu	rWORD2, 8(rSTR2)
>  L(g1):	add	rTMP, rFEFE, rWORD1
>  	nor	rNEG, r7F7F, rWORD1
> -
>  	and.	rTMP, rTMP, rNEG
>  	cmpd	cr1, rWORD1, rWORD2
>  	beq+	L(g0)
> -L(endstring):
> +
>  /* OK. We've hit the end of the string. We need to be careful that
>     we don't compare two strings as different because of gunk beyond
>     the end of the strings...  */
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> +	addi    rTMP2, rTMP, -1
> +	beq	cr1, L(equal)
> +	andc    rTMP2, rTMP2, rTMP
> +	rldimi	rTMP2, rTMP2, 1, 0
> +	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
> +	and	rWORD1, rWORD1, rTMP2
> +	cmpd	cr1, rWORD1, rWORD2
> +	beq	cr1, L(equal)
> +	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
> +	neg	rNEG, rBITDIF
> +	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
> +	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
> +	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
> +	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
> +	sld	rWORD2, rWORD2, rNEG
> +	xor.	rBITDIF, rWORD1, rWORD2
> +	sub	rRTN, rWORD1, rWORD2
> +	blt-	L(highbit)
> +	sradi	rRTN, rRTN, 63		/* must return an int.  */
> +	ori	rRTN, rRTN, 1
> +	blr
> +L(equal):
> +	li	rRTN, 0
> +	blr
> +
> +L(different):
> +	ld	rWORD1, -8(rSTR1)
> +	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
> +	neg	rNEG, rBITDIF
> +	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
> +	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
> +	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
> +	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
> +	sld	rWORD2, rWORD2, rNEG
> +	xor.	rBITDIF, rWORD1, rWORD2
> +	sub	rRTN, rWORD1, rWORD2
> +	blt-	L(highbit)
> +	sradi	rRTN, rRTN, 63
> +	ori	rRTN, rRTN, 1
> +	blr
> +L(highbit):
> +	sradi	rRTN, rWORD2, 63
> +	ori	rRTN, rRTN, 1
> +	blr
> +
> +#else
> +L(endstring):
>  	and	rTMP, r7F7F, rWORD1
>  	beq	cr1, L(equal)
>  	add	rTMP, rTMP, r7F7F
>  	xor.	rBITDIF, rWORD1, rWORD2
> -
>  	andc	rNEG, rNEG, rTMP
>  	blt-	L(highbit)
>  	cntlzd	rBITDIF, rBITDIF
> @@ -79,7 +127,7 @@ L(endstring):
>  	cmpd	cr1, rNEG, rBITDIF
>  	sub	rRTN, rWORD1, rWORD2
>  	blt-	cr1, L(equal)
> -	sradi	rRTN, rRTN, 63
> +	sradi	rRTN, rRTN, 63		/* must return an int.  */
>  	ori	rRTN, rRTN, 1
>  	blr
>  L(equal):
> @@ -95,11 +143,10 @@ L(different):
>  	ori	rRTN, rRTN, 1
>  	blr
>  L(highbit):
> -	srdi	rWORD2, rWORD2, 56
> -	srdi	rWORD1, rWORD1, 56
> -	sub	rRTN, rWORD1, rWORD2
> +	sradi	rRTN, rWORD2, 63
> +	ori	rRTN, rRTN, 1
>  	blr
> -
> +#endif
>
>  /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
>  	.align 4
> diff --git a/sysdeps/powerpc/powerpc32/strcmp.S b/sysdeps/powerpc/powerpc32/strcmp.S
> index 297ca3c..91d60c9 100644
> --- a/sysdeps/powerpc/powerpc32/strcmp.S
> +++ b/sysdeps/powerpc/powerpc32/strcmp.S
> @@ -24,7 +24,7 @@
>
>  EALIGN (strcmp, 4, 0)
>
> -#define rTMP	r0
> +#define rTMP2	r0
>  #define rRTN	r3
>  #define rSTR1	r3	/* first string arg */
>  #define rSTR2	r4	/* second string arg */
> @@ -34,6 +34,7 @@ EALIGN (strcmp, 4, 0)
>  #define r7F7F	r8	/* constant 0x7f7f7f7f */
>  #define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f) */
>  #define rBITDIF	r10	/* bits that differ in s1 & s2 words */
> +#define rTMP	r11
>
>
>  	or	rTMP, rSTR2, rSTR1
> @@ -56,10 +57,45 @@ L(g1):	add	rTMP, rFEFE, rWORD1
>  	and.	rTMP, rTMP, rNEG
>  	cmpw	cr1, rWORD1, rWORD2
>  	beq+	L(g0)
> -L(endstring):
> +
>  /* OK. We've hit the end of the string. We need to be careful that
>     we don't compare two strings as different because of gunk beyond
>     the end of the strings...  */
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> +	addi    rTMP2, rTMP, -1
> +	andc    rTMP2, rTMP2, rTMP
> +	rlwimi	rTMP2, rTMP2, 1, 0, 30
> +	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
> +	and	rWORD1, rWORD1, rTMP2
> +	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
> +	rlwinm	rTMP, rWORD1, 8, 0xffffffff
> +	rlwimi	rTMP2, rWORD2, 24, 0, 7
> +	rlwimi	rTMP, rWORD1, 24, 0, 7
> +	rlwimi	rTMP2, rWORD2, 24, 16, 23
> +	rlwimi	rTMP, rWORD1, 24, 16, 23
> +	xor.	rBITDIF, rTMP, rTMP2
> +	sub	rRTN, rTMP, rTMP2
> +	bgelr+
> +	ori	rRTN, rTMP2, 1
> +	blr
> +
> +L(different):
> +	lwz	rWORD1, -4(rSTR1)
> +	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
> +	rlwinm	rTMP, rWORD1, 8, 0xffffffff
> +	rlwimi	rTMP2, rWORD2, 24, 0, 7
> +	rlwimi	rTMP, rWORD1, 24, 0, 7
> +	rlwimi	rTMP2, rWORD2, 24, 16, 23
> +	rlwimi	rTMP, rWORD1, 24, 16, 23
> +	xor.	rBITDIF, rTMP, rTMP2
> +	sub	rRTN, rTMP, rTMP2
> +	bgelr+
> +	ori	rRTN, rTMP2, 1
> +	blr
> +
> +#else
> +L(endstring):
>  	and	rTMP, r7F7F, rWORD1
>  	beq	cr1, L(equal)
>  	add	rTMP, rTMP, r7F7F
> @@ -84,7 +120,7 @@ L(different):
>  L(highbit):
>  	ori	rRTN, rWORD2, 1
>  	blr
> -
> +#endif
>
>  /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
>  	.align 4
> diff --git a/sysdeps/powerpc/powerpc64/strncmp.S b/sysdeps/powerpc/powerpc64/strncmp.S
> index 779d9f7..8f842c4 100644
> --- a/sysdeps/powerpc/powerpc64/strncmp.S
> +++ b/sysdeps/powerpc/powerpc64/strncmp.S
> @@ -25,7 +25,7 @@
>  EALIGN (strncmp, 4, 0)
>  	CALL_MCOUNT 3
>
> -#define rTMP	r0
> +#define rTMP2	r0
>  #define rRTN	r3
>  #define rSTR1	r3	/* first string arg */
>  #define rSTR2	r4	/* second string arg */
> @@ -36,6 +36,7 @@ EALIGN (strncmp, 4, 0)
>  #define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
>  #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
>  #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
> +#define rTMP	r12
>
>  	dcbt	0,rSTR1
>  	or	rTMP, rSTR2, rSTR1
> @@ -77,12 +78,59 @@ L(g1):	add	rTMP, rFEFE, rWORD1
>     we don't compare two strings as different because of gunk beyond
>     the end of the strings...  */
>
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> +	addi    rTMP2, rTMP, -1
> +	beq	cr1, L(equal)
> +	andc    rTMP2, rTMP2, rTMP
> +	rldimi	rTMP2, rTMP2, 1, 0
> +	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
> +	and	rWORD1, rWORD1, rTMP2
> +	cmpd	cr1, rWORD1, rWORD2
> +	beq	cr1, L(equal)
> +	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
> +	neg	rNEG, rBITDIF
> +	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
> +	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
> +	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
> +	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
> +	sld	rWORD2, rWORD2, rNEG
> +	xor.	rBITDIF, rWORD1, rWORD2
> +	sub	rRTN, rWORD1, rWORD2
> +	blt-	L(highbit)
> +	sradi	rRTN, rRTN, 63		/* must return an int.  */
> +	ori	rRTN, rRTN, 1
> +	blr
> +L(equal):
> +	li	rRTN, 0
> +	blr
> +
> +L(different):
> +	ld	rWORD1, -8(rSTR1)
> +	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
> +	neg	rNEG, rBITDIF
> +	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
> +	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
> +	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
> +	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
> +	sld	rWORD2, rWORD2, rNEG
> +	xor.	rBITDIF, rWORD1, rWORD2
> +	sub	rRTN, rWORD1, rWORD2
> +	blt-	L(highbit)
> +	sradi	rRTN, rRTN, 63
> +	ori	rRTN, rRTN, 1
> +	blr
> +L(highbit):
> +	sradi	rRTN, rWORD2, 63
> +	ori	rRTN, rRTN, 1
> +	blr
> +
> +#else
>  L(endstring):
>  	and	rTMP, r7F7F, rWORD1
>  	beq	cr1, L(equal)
>  	add	rTMP, rTMP, r7F7F
>  	xor.	rBITDIF, rWORD1, rWORD2
> -
>  	andc	rNEG, rNEG, rTMP
>  	blt-	L(highbit)
>  	cntlzd	rBITDIF, rBITDIF
> @@ -91,7 +139,7 @@ L(endstring):
>  	cmpd	cr1, rNEG, rBITDIF
>  	sub	rRTN, rWORD1, rWORD2
>  	blt-	cr1, L(equal)
> -	sradi	rRTN, rRTN, 63
> +	sradi	rRTN, rRTN, 63		/* must return an int.  */
>  	ori	rRTN, rRTN, 1
>  	blr
>  L(equal):
> @@ -99,7 +147,7 @@ L(equal):
>  	blr
>
>  L(different):
> -	ldu	rWORD1, -8(rSTR1)
> +	ld	rWORD1, -8(rSTR1)
>  	xor.	rBITDIF, rWORD1, rWORD2
>  	sub	rRTN, rWORD1, rWORD2
>  	blt-	L(highbit)
> @@ -107,11 +155,10 @@ L(different):
>  	ori	rRTN, rRTN, 1
>  	blr
>  L(highbit):
> -	srdi	rWORD2, rWORD2, 56
> -	srdi	rWORD1, rWORD1, 56
> -	sub	rRTN, rWORD1, rWORD2
> +	sradi	rRTN, rWORD2, 63
> +	ori	rRTN, rRTN, 1
>  	blr
> -
> +#endif
>
>  /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
>  	.align 4
> diff --git a/sysdeps/powerpc/powerpc32/strncmp.S b/sysdeps/powerpc/powerpc32/strncmp.S
> index fa345d2..e36a160 100644
> --- a/sysdeps/powerpc/powerpc32/strncmp.S
> +++ b/sysdeps/powerpc/powerpc32/strncmp.S
> @@ -24,7 +24,7 @@
>
>  EALIGN (strncmp, 4, 0)
>
> -#define rTMP	r0
> +#define rTMP2	r0
>  #define rRTN	r3
>  #define rSTR1	r3	/* first string arg */
>  #define rSTR2	r4	/* second string arg */
> @@ -35,6 +35,7 @@ EALIGN (strncmp, 4, 0)
>  #define r7F7F	r9	/* constant 0x7f7f7f7f */
>  #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
>  #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
> +#define rTMP	r12
>
>  	dcbt	0,rSTR1
>  	or	rTMP, rSTR2, rSTR1
> @@ -73,12 +74,45 @@ L(g1):	add	rTMP, rFEFE, rWORD1
>     we don't compare two strings as different because of gunk beyond
>     the end of the strings...  */
>
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> +	slwi	rTMP, rTMP, 1
> +	addi    rTMP2, rTMP, -1
> +	andc    rTMP2, rTMP2, rTMP
> +	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
> +	and	rWORD1, rWORD1, rTMP2
> +	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
> +	rlwinm	rTMP, rWORD1, 8, 0xffffffff
> +	rlwimi	rTMP2, rWORD2, 24, 0, 7
> +	rlwimi	rTMP, rWORD1, 24, 0, 7
> +	rlwimi	rTMP2, rWORD2, 24, 16, 23
> +	rlwimi	rTMP, rWORD1, 24, 16, 23
> +	xor.	rBITDIF, rTMP, rTMP2
> +	sub	rRTN, rTMP, rTMP2
> +	bgelr+
> +	ori	rRTN, rTMP2, 1
> +	blr
> +
> +L(different):
> +	lwz	rWORD1, -4(rSTR1)
> +	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
> +	rlwinm	rTMP, rWORD1, 8, 0xffffffff
> +	rlwimi	rTMP2, rWORD2, 24, 0, 7
> +	rlwimi	rTMP, rWORD1, 24, 0, 7
> +	rlwimi	rTMP2, rWORD2, 24, 16, 23
> +	rlwimi	rTMP, rWORD1, 24, 16, 23
> +	xor.	rBITDIF, rTMP, rTMP2
> +	sub	rRTN, rTMP, rTMP2
> +	bgelr+
> +	ori	rRTN, rTMP2, 1
> +	blr
> +
> +#else
>  L(endstring):
>  	and	rTMP, r7F7F, rWORD1
>  	beq	cr1, L(equal)
>  	add	rTMP, rTMP, r7F7F
>  	xor.	rBITDIF, rWORD1, rWORD2
> -
>  	andc	rNEG, rNEG, rTMP
>  	blt-	L(highbit)
>  	cntlzw	rBITDIF, rBITDIF
> @@ -86,28 +120,20 @@ L(endstring):
>  	addi	rNEG, rNEG, 7
>  	cmpw	cr1, rNEG, rBITDIF
>  	sub	rRTN, rWORD1, rWORD2
> -	blt-	cr1, L(equal)
> -	srawi	rRTN, rRTN, 31
> -	ori	rRTN, rRTN, 1
> -	blr
> +	bgelr+	cr1
>  L(equal):
>  	li	rRTN, 0
>  	blr
>
>  L(different):
> -	lwzu	rWORD1, -4(rSTR1)
> +	lwz	rWORD1, -4(rSTR1)
>  	xor.	rBITDIF, rWORD1, rWORD2
>  	sub	rRTN, rWORD1, rWORD2
> -	blt-	L(highbit)
> -	srawi	rRTN, rRTN, 31
> -	ori	rRTN, rRTN, 1
> -	blr
> +	bgelr+
>  L(highbit):
> -	srwi	rWORD2, rWORD2, 24
> -	srwi	rWORD1, rWORD1, 24
> -	sub	rRTN, rWORD1, rWORD2
> +	ori	rRTN, rWORD2, 1
>  	blr
> -
> +#endif
>
>  /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
>  	.align 4
> diff --git a/sysdeps/powerpc/powerpc64/power4/strncmp.S b/sysdeps/powerpc/powerpc64/power4/strncmp.S
> index 1276e16..5d136cf 100644
> --- a/sysdeps/powerpc/powerpc64/power4/strncmp.S
> +++ b/sysdeps/powerpc/powerpc64/power4/strncmp.S
> @@ -25,7 +25,7 @@
>  EALIGN (strncmp, 4, 0)
>  	CALL_MCOUNT 3
>
> -#define rTMP	r0
> +#define rTMP2	r0
>  #define rRTN	r3
>  #define rSTR1	r3	/* first string arg */
>  #define rSTR2	r4	/* second string arg */
> @@ -38,6 +38,7 @@ EALIGN (strncmp, 4, 0)
>  #define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
>  #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
>  #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
> +#define rTMP	r12
>
>  	dcbt	0,rSTR1
>  	or	rTMP, rSTR2, rSTR1
> @@ -79,12 +80,59 @@ L(g1):	add	rTMP, rFEFE, rWORD1
>     we don't compare two strings as different because of gunk beyond
>     the end of the strings...  */
>
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> +	addi    rTMP2, rTMP, -1
> +	beq	cr1, L(equal)
> +	andc    rTMP2, rTMP2, rTMP
> +	rldimi	rTMP2, rTMP2, 1, 0
> +	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
> +	and	rWORD1, rWORD1, rTMP2
> +	cmpd	cr1, rWORD1, rWORD2
> +	beq	cr1, L(equal)
> +	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
> +	neg	rNEG, rBITDIF
> +	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
> +	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
> +	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
> +	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
> +	sld	rWORD2, rWORD2, rNEG
> +	xor.	rBITDIF, rWORD1, rWORD2
> +	sub	rRTN, rWORD1, rWORD2
> +	blt-	L(highbit)
> +	sradi	rRTN, rRTN, 63		/* must return an int.  */
> +	ori	rRTN, rRTN, 1
> +	blr
> +L(equal):
> +	li	rRTN, 0
> +	blr
> +
> +L(different):
> +	ld	rWORD1, -8(rSTR1)
> +	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
> +	neg	rNEG, rBITDIF
> +	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
> +	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
> +	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
> +	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
> +	sld	rWORD2, rWORD2, rNEG
> +	xor.	rBITDIF, rWORD1, rWORD2
> +	sub	rRTN, rWORD1, rWORD2
> +	blt-	L(highbit)
> +	sradi	rRTN, rRTN, 63
> +	ori	rRTN, rRTN, 1
> +	blr
> +L(highbit):
> +	sradi	rRTN, rWORD2, 63
> +	ori	rRTN, rRTN, 1
> +	blr
> +
> +#else
>  L(endstring):
>  	and	rTMP, r7F7F, rWORD1
>  	beq	cr1, L(equal)
>  	add	rTMP, rTMP, r7F7F
>  	xor.	rBITDIF, rWORD1, rWORD2
> -
>  	andc	rNEG, rNEG, rTMP
>  	blt-	L(highbit)
>  	cntlzd	rBITDIF, rBITDIF
> @@ -93,7 +141,7 @@ L(endstring):
>  	cmpd	cr1, rNEG, rBITDIF
>  	sub	rRTN, rWORD1, rWORD2
>  	blt-	cr1, L(equal)
> -	sradi	rRTN, rRTN, 63
> +	sradi	rRTN, rRTN, 63		/* must return an int.  */
>  	ori	rRTN, rRTN, 1
>  	blr
>  L(equal):
> @@ -101,7 +149,7 @@ L(equal):
>  	blr
>
>  L(different):
> -	ldu	rWORD1, -8(rSTR1)
> +	ld	rWORD1, -8(rSTR1)
>  	xor.	rBITDIF, rWORD1, rWORD2
>  	sub	rRTN, rWORD1, rWORD2
>  	blt-	L(highbit)
> @@ -109,11 +157,10 @@ L(different):
>  	ori	rRTN, rRTN, 1
>  	blr
>  L(highbit):
> -	srdi	rWORD2, rWORD2, 56
> -	srdi	rWORD1, rWORD1, 56
> -	sub	rRTN, rWORD1, rWORD2
> +	sradi	rRTN, rWORD2, 63
> +	ori	rRTN, rRTN, 1
>  	blr
> -
> +#endif
>
>  /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
>  	.align 4
> diff --git a/sysdeps/powerpc/powerpc32/power4/strncmp.S b/sysdeps/powerpc/powerpc32/power4/strncmp.S
> index 724d908..89b961e 100644
> --- a/sysdeps/powerpc/powerpc32/power4/strncmp.S
> +++ b/sysdeps/powerpc/powerpc32/power4/strncmp.S
> @@ -24,7 +24,7 @@
>
>  EALIGN (strncmp, 4, 0)
>
> -#define rTMP	r0
> +#define rTMP2	r0
>  #define rRTN	r3
>  #define rSTR1	r3	/* first string arg */
>  #define rSTR2	r4	/* second string arg */
> @@ -37,6 +37,7 @@ EALIGN (strncmp, 4, 0)
>  #define r7F7F	r9	/* constant 0x7f7f7f7f */
>  #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
>  #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
> +#define rTMP	r12
>
>  	dcbt	0,rSTR1
>  	or	rTMP, rSTR2, rSTR1
> @@ -75,12 +76,45 @@ L(g1):	add	rTMP, rFEFE, rWORD1
>     we don't compare two strings as different because of gunk beyond
>     the end of the strings...  */
>
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> +	slwi	rTMP, rTMP, 1
> +	addi    rTMP2, rTMP, -1
> +	andc    rTMP2, rTMP2, rTMP
> +	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
> +	and	rWORD1, rWORD1, rTMP2
> +	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
> +	rlwinm	rTMP, rWORD1, 8, 0xffffffff
> +	rldimi	rTMP2, rWORD2, 24, 32
> +	rldimi	rTMP, rWORD1, 24, 32
> +	rlwimi	rTMP2, rWORD2, 24, 16, 23
> +	rlwimi	rTMP, rWORD1, 24, 16, 23
> +	xor.	rBITDIF, rTMP, rTMP2
> +	sub	rRTN, rTMP, rTMP2
> +	bgelr+
> +	ori	rRTN, rTMP2, 1
> +	blr
> +
> +L(different):
> +	lwz	rWORD1, -4(rSTR1)
> +	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
> +	rlwinm	rTMP, rWORD1, 8, 0xffffffff
> +	rldimi	rTMP2, rWORD2, 24, 32
> +	rldimi	rTMP, rWORD1, 24, 32
> +	rlwimi	rTMP2, rWORD2, 24, 16, 23
> +	rlwimi	rTMP, rWORD1, 24, 16, 23
> +	xor.	rBITDIF, rTMP, rTMP2
> +	sub	rRTN, rTMP, rTMP2
> +	bgelr+
> +	ori	rRTN, rTMP2, 1
> +	blr
> +
> +#else
>  L(endstring):
>  	and	rTMP, r7F7F, rWORD1
>  	beq	cr1, L(equal)
>  	add	rTMP, rTMP, r7F7F
>  	xor.	rBITDIF, rWORD1, rWORD2
> -
>  	andc	rNEG, rNEG, rTMP
>  	blt-	L(highbit)
>  	cntlzw	rBITDIF, rBITDIF
> @@ -88,28 +122,20 @@ L(endstring):
>  	addi	rNEG, rNEG, 7
>  	cmpw	cr1, rNEG, rBITDIF
>  	sub	rRTN, rWORD1, rWORD2
> -	blt-	cr1, L(equal)
> -	srawi	rRTN, rRTN, 31
> -	ori	rRTN, rRTN, 1
> -	blr
> +	bgelr+	cr1
>  L(equal):
>  	li	rRTN, 0
>  	blr
>
>  L(different):
> -	lwzu	rWORD1, -4(rSTR1)
> +	lwz	rWORD1, -4(rSTR1)
>  	xor.	rBITDIF, rWORD1, rWORD2
>  	sub	rRTN, rWORD1, rWORD2
> -	blt-	L(highbit)
> -	srawi	rRTN, rRTN, 31
> -	ori	rRTN, rRTN, 1
> -	blr
> +	bgelr+
>  L(highbit):
> -	srwi	rWORD2, rWORD2, 24
> -	srwi	rWORD1, rWORD1, 24
> -	sub	rRTN, rWORD1, rWORD2
> +	ori	rRTN, rWORD2, 1
>  	blr
> -
> +#endif
>
>  /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
>  	.align 4
> diff --git a/sysdeps/powerpc/powerpc64/power7/strncmp.S b/sysdeps/powerpc/powerpc64/power7/strncmp.S
> index 77ecad5..e618b01 100644
> --- a/sysdeps/powerpc/powerpc64/power7/strncmp.S
> +++ b/sysdeps/powerpc/powerpc64/power7/strncmp.S
> @@ -27,7 +27,7 @@
>  EALIGN (strncmp,5,0)
>  	CALL_MCOUNT 3
>
> -#define rTMP	r0
> +#define rTMP2	r0
>  #define rRTN	r3
>  #define rSTR1	r3	/* first string arg */
>  #define rSTR2	r4	/* second string arg */
> @@ -40,6 +40,7 @@ EALIGN (strncmp,5,0)
>  #define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
>  #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
>  #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
> +#define rTMP	r12
>
>  	dcbt	0,rSTR1
>  	nop
> @@ -83,12 +84,57 @@ L(g1):	add	rTMP,rFEFE,rWORD1
>     we don't compare two strings as different because of gunk beyond
>     the end of the strings...  */
>
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> +	addi    rTMP2, rTMP, -1
> +	beq	cr1, L(equal)
> +	andc    rTMP2, rTMP2, rTMP
> +	rldimi	rTMP2, rTMP2, 1, 0
> +	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
> +	and	rWORD1, rWORD1, rTMP2
> +	cmpd	cr1, rWORD1, rWORD2
> +	beq	cr1, L(equal)
> +	cmpb	rBITDIF, rWORD1, rWORD2	/* 0xff on equal bytes.  */
> +	addi	rNEG, rBITDIF, 1
> +	orc	rNEG, rNEG, rBITDIF	/* 0's below LS differing byte.  */
> +	sldi	rNEG, rNEG, 8		/* 1's above LS differing byte.  */
> +	andc	rWORD1, rWORD1, rNEG	/* mask off MS bytes.  */
> +	andc	rWORD2, rWORD2, rNEG
> +	xor.	rBITDIF, rWORD1, rWORD2
> +	sub	rRTN, rWORD1, rWORD2
> +	blt	L(highbit)
> +	sradi	rRTN, rRTN, 63		/* must return an int.  */
> +	ori	rRTN, rRTN, 1
> +	blr
> +L(equal):
> +	li	rRTN, 0
> +	blr
> +
> +L(different):
> +	ld	rWORD1, -8(rSTR1)
> +	cmpb	rBITDIF, rWORD1, rWORD2	/* 0xff on equal bytes.  */
> +	addi	rNEG, rBITDIF, 1
> +	orc	rNEG, rNEG, rBITDIF	/* 0's below LS differing byte.  */
> +	sldi	rNEG, rNEG, 8		/* 1's above LS differing byte.  */
> +	andc	rWORD1, rWORD1, rNEG	/* mask off MS bytes.  */
> +	andc	rWORD2, rWORD2, rNEG
> +	xor.	rBITDIF, rWORD1, rWORD2
> +	sub	rRTN, rWORD1, rWORD2
> +	blt	L(highbit)
> +	sradi	rRTN, rRTN, 63
> +	ori	rRTN, rRTN, 1
> +	blr
> +L(highbit):
> +	sradi	rRTN, rWORD2, 63
> +	ori	rRTN, rRTN, 1
> +	blr
> +
> +#else
>  L(endstring):
>  	and	rTMP,r7F7F,rWORD1
>  	beq	cr1,L(equal)
>  	add	rTMP,rTMP,r7F7F
>  	xor.	rBITDIF,rWORD1,rWORD2
> -
>  	andc	rNEG,rNEG,rTMP
>  	blt	L(highbit)
>  	cntlzd	rBITDIF,rBITDIF
> @@ -97,7 +143,7 @@ L(endstring):
>  	cmpd	cr1,rNEG,rBITDIF
>  	sub	rRTN,rWORD1,rWORD2
>  	blt	cr1,L(equal)
> -	sradi	rRTN,rRTN,63
> +	sradi	rRTN,rRTN,63		/* must return an int.  */
>  	ori	rRTN,rRTN,1
>  	blr
>  L(equal):
> @@ -105,7 +151,7 @@ L(equal):
>  	blr
>
>  L(different):
> -	ldu	rWORD1,-8(rSTR1)
> +	ld	rWORD1,-8(rSTR1)
>  	xor.	rBITDIF,rWORD1,rWORD2
>  	sub	rRTN,rWORD1,rWORD2
>  	blt	L(highbit)
> @@ -113,11 +159,10 @@ L(different):
>  	ori	rRTN,rRTN,1
>  	blr
>  L(highbit):
> -	srdi	rWORD2,rWORD2,56
> -	srdi	rWORD1,rWORD1,56
> -	sub	rRTN,rWORD1,rWORD2
> +	sradi	rRTN,rWORD2,63
> +	ori	rRTN,rRTN,1
>  	blr
> -
> +#endif
>
>  /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
>  	.align	4
> diff --git a/sysdeps/powerpc/powerpc32/power7/strncmp.S b/sysdeps/powerpc/powerpc32/power7/strncmp.S
> index fdae44d..10c9d25 100644
> --- a/sysdeps/powerpc/powerpc32/power7/strncmp.S
> +++ b/sysdeps/powerpc/powerpc32/power7/strncmp.S
> @@ -26,7 +26,7 @@
>
>  EALIGN (strncmp,5,0)
>
> -#define rTMP	r0
> +#define rTMP2	r0
>  #define rRTN	r3
>  #define rSTR1	r3	/* first string arg */
>  #define rSTR2	r4	/* second string arg */
> @@ -39,6 +39,7 @@ EALIGN (strncmp,5,0)
>  #define r7F7F	r9	/* constant 0x7f7f7f7f */
>  #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
>  #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
> +#define rTMP	r12
>
>  	dcbt	0,rSTR1
>  	nop
> @@ -78,13 +79,45 @@ L(g1):	add	rTMP,rFEFE,rWORD1
>  /* OK. We've hit the end of the string. We need to be careful that
>     we don't compare two strings as different because of gunk beyond
>     the end of the strings...  */
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> +	slwi	rTMP, rTMP, 1
> +	addi    rTMP2, rTMP, -1
> +	andc    rTMP2, rTMP2, rTMP
> +	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
> +	and	rWORD1, rWORD1, rTMP2
> +	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
> +	rlwinm	rTMP, rWORD1, 8, 0xffffffff
> +	rldimi	rTMP2, rWORD2, 24, 32
> +	rldimi	rTMP, rWORD1, 24, 32
> +	rlwimi	rTMP2, rWORD2, 24, 16, 23
> +	rlwimi	rTMP, rWORD1, 24, 16, 23
> +	xor.	rBITDIF, rTMP, rTMP2
> +	sub	rRTN, rTMP, rTMP2
> +	bgelr
> +	ori	rRTN, rTMP2, 1
> +	blr
> +
> +L(different):
> +	lwz	rWORD1, -4(rSTR1)
> +	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
> +	rlwinm	rTMP, rWORD1, 8, 0xffffffff
> +	rldimi	rTMP2, rWORD2, 24, 32
> +	rldimi	rTMP, rWORD1, 24, 32
> +	rlwimi	rTMP2, rWORD2, 24, 16, 23
> +	rlwimi	rTMP, rWORD1, 24, 16, 23
> +	xor.	rBITDIF, rTMP, rTMP2
> +	sub	rRTN, rTMP, rTMP2
> +	bgelr
> +	ori	rRTN, rTMP2, 1
> +	blr
>
> +#else
>  L(endstring):
>  	and	rTMP,r7F7F,rWORD1
>  	beq	cr1,L(equal)
>  	add	rTMP,rTMP,r7F7F
>  	xor.	rBITDIF,rWORD1,rWORD2
> -
>  	andc	rNEG,rNEG,rTMP
>  	blt	L(highbit)
>  	cntlzw	rBITDIF,rBITDIF
> @@ -92,28 +125,20 @@ L(endstring):
>  	addi	rNEG,rNEG,7
>  	cmpw	cr1,rNEG,rBITDIF
>  	sub	rRTN,rWORD1,rWORD2
> -	blt	cr1,L(equal)
> -	srawi	rRTN,rRTN,31
> -	ori	rRTN,rRTN,1
> -	blr
> +	bgelr	cr1
>  L(equal):
>  	li	rRTN,0
>  	blr
>
>  L(different):
> -	lwzu	rWORD1,-4(rSTR1)
> +	lwz	rWORD1,-4(rSTR1)
>  	xor.	rBITDIF,rWORD1,rWORD2
>  	sub	rRTN,rWORD1,rWORD2
> -	blt	L(highbit)
> -	srawi	rRTN,rRTN,31
> -	ori	rRTN,rRTN,1
> -	blr
> +	bgelr
>  L(highbit):
> -	srwi	rWORD2,rWORD2,24
> -	srwi	rWORD1,rWORD1,24
> -	sub	rRTN,rWORD1,rWORD2
> +	ori	rRTN, rWORD2, 1
>  	blr
> -
> +#endif
>
>  /* Oh well. In this case, we just do a byte-by-byte comparison.  */
>  	.align	4
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]