This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: PowerPC LE strcmp and strncmp
- From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
- To: libc-alpha at sourceware dot org
- Date: Thu, 22 Aug 2013 11:35:58 -0300
- Subject: Re: PowerPC LE strcmp and strncmp
- References: <20130809052006 dot GJ3294 at bubble dot grove dot modra dot org>
Hi Alan,
This patch is ok, I even noticed an improvement for strncmp ppc32.
On 09-08-2013 02:20, Alan Modra wrote:
> More little-endian support. I leave the main strcmp loops unchanged,
> (well, except for renumbering rTMP to something other than r0 since
> it's needed in an addi insn) and modify the tail for little-endian.
>
> I noticed some of the big-endian tail code was a little untidy so have
> cleaned that up too.
>
> * sysdeps/powerpc/powerpc64/strcmp.S (rTMP2): Define as r0.
> (rTMP): Define as r11.
> (strcmp): Add little-endian support. Optimise tail.
> * sysdeps/powerpc/powerpc32/strcmp.S: Similarly.
> * sysdeps/powerpc/powerpc64/strncmp.S: Likewise.
> * sysdeps/powerpc/powerpc32/strncmp.S: Likewise.
> * sysdeps/powerpc/powerpc64/power4/strncmp.S: Likewise.
> * sysdeps/powerpc/powerpc32/power4/strncmp.S: Likewise.
> * sysdeps/powerpc/powerpc64/power7/strncmp.S: Likewise.
> * sysdeps/powerpc/powerpc32/power7/strncmp.S: Likewise.
>
> diff --git a/sysdeps/powerpc/powerpc64/strcmp.S b/sysdeps/powerpc/powerpc64/strcmp.S
> index c9d6dac..7085468 100644
> --- a/sysdeps/powerpc/powerpc64/strcmp.S
> +++ b/sysdeps/powerpc/powerpc64/strcmp.S
> @@ -25,7 +25,7 @@
> EALIGN (strcmp, 4, 0)
> CALL_MCOUNT 2
>
> -#define rTMP r0
> +#define rTMP2 r0
> #define rRTN r3
> #define rSTR1 r3 /* first string arg */
> #define rSTR2 r4 /* second string arg */
> @@ -35,6 +35,7 @@ EALIGN (strcmp, 4, 0)
> #define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */
> #define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
> #define rBITDIF r10 /* bits that differ in s1 & s2 words */
> +#define rTMP r11
>
> dcbt 0,rSTR1
> or rTMP, rSTR2, rSTR1
> @@ -58,19 +59,66 @@ L(g0): ldu rWORD1, 8(rSTR1)
> ldu rWORD2, 8(rSTR2)
> L(g1): add rTMP, rFEFE, rWORD1
> nor rNEG, r7F7F, rWORD1
> -
> and. rTMP, rTMP, rNEG
> cmpd cr1, rWORD1, rWORD2
> beq+ L(g0)
> -L(endstring):
> +
> /* OK. We've hit the end of the string. We need to be careful that
> we don't compare two strings as different because of gunk beyond
> the end of the strings... */
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> + addi rTMP2, rTMP, -1
> + beq cr1, L(equal)
> + andc rTMP2, rTMP2, rTMP
> + rldimi rTMP2, rTMP2, 1, 0
> + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
> + and rWORD1, rWORD1, rTMP2
> + cmpd cr1, rWORD1, rWORD2
> + beq cr1, L(equal)
> + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
> + neg rNEG, rBITDIF
> + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
> + cntlzd rNEG, rNEG /* bitcount of the bit. */
> + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
> + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
> + sld rWORD2, rWORD2, rNEG
> + xor. rBITDIF, rWORD1, rWORD2
> + sub rRTN, rWORD1, rWORD2
> + blt- L(highbit)
> + sradi rRTN, rRTN, 63 /* must return an int. */
> + ori rRTN, rRTN, 1
> + blr
> +L(equal):
> + li rRTN, 0
> + blr
> +
> +L(different):
> + ld rWORD1, -8(rSTR1)
> + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
> + neg rNEG, rBITDIF
> + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
> + cntlzd rNEG, rNEG /* bitcount of the bit. */
> + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
> + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
> + sld rWORD2, rWORD2, rNEG
> + xor. rBITDIF, rWORD1, rWORD2
> + sub rRTN, rWORD1, rWORD2
> + blt- L(highbit)
> + sradi rRTN, rRTN, 63
> + ori rRTN, rRTN, 1
> + blr
> +L(highbit):
> + sradi rRTN, rWORD2, 63
> + ori rRTN, rRTN, 1
> + blr
> +
> +#else
> +L(endstring):
> and rTMP, r7F7F, rWORD1
> beq cr1, L(equal)
> add rTMP, rTMP, r7F7F
> xor. rBITDIF, rWORD1, rWORD2
> -
> andc rNEG, rNEG, rTMP
> blt- L(highbit)
> cntlzd rBITDIF, rBITDIF
> @@ -79,7 +127,7 @@ L(endstring):
> cmpd cr1, rNEG, rBITDIF
> sub rRTN, rWORD1, rWORD2
> blt- cr1, L(equal)
> - sradi rRTN, rRTN, 63
> + sradi rRTN, rRTN, 63 /* must return an int. */
> ori rRTN, rRTN, 1
> blr
> L(equal):
> @@ -95,11 +143,10 @@ L(different):
> ori rRTN, rRTN, 1
> blr
> L(highbit):
> - srdi rWORD2, rWORD2, 56
> - srdi rWORD1, rWORD1, 56
> - sub rRTN, rWORD1, rWORD2
> + sradi rRTN, rWORD2, 63
> + ori rRTN, rRTN, 1
> blr
> -
> +#endif
>
> /* Oh well. In this case, we just do a byte-by-byte comparison. */
> .align 4
> diff --git a/sysdeps/powerpc/powerpc32/strcmp.S b/sysdeps/powerpc/powerpc32/strcmp.S
> index 297ca3c..91d60c9 100644
> --- a/sysdeps/powerpc/powerpc32/strcmp.S
> +++ b/sysdeps/powerpc/powerpc32/strcmp.S
> @@ -24,7 +24,7 @@
>
> EALIGN (strcmp, 4, 0)
>
> -#define rTMP r0
> +#define rTMP2 r0
> #define rRTN r3
> #define rSTR1 r3 /* first string arg */
> #define rSTR2 r4 /* second string arg */
> @@ -34,6 +34,7 @@ EALIGN (strcmp, 4, 0)
> #define r7F7F r8 /* constant 0x7f7f7f7f */
> #define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
> #define rBITDIF r10 /* bits that differ in s1 & s2 words */
> +#define rTMP r11
>
>
> or rTMP, rSTR2, rSTR1
> @@ -56,10 +57,45 @@ L(g1): add rTMP, rFEFE, rWORD1
> and. rTMP, rTMP, rNEG
> cmpw cr1, rWORD1, rWORD2
> beq+ L(g0)
> -L(endstring):
> +
> /* OK. We've hit the end of the string. We need to be careful that
> we don't compare two strings as different because of gunk beyond
> the end of the strings... */
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> + addi rTMP2, rTMP, -1
> + andc rTMP2, rTMP2, rTMP
> + rlwimi rTMP2, rTMP2, 1, 0, 30
> + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
> + and rWORD1, rWORD1, rTMP2
> + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
> + rlwinm rTMP, rWORD1, 8, 0xffffffff
> + rlwimi rTMP2, rWORD2, 24, 0, 7
> + rlwimi rTMP, rWORD1, 24, 0, 7
> + rlwimi rTMP2, rWORD2, 24, 16, 23
> + rlwimi rTMP, rWORD1, 24, 16, 23
> + xor. rBITDIF, rTMP, rTMP2
> + sub rRTN, rTMP, rTMP2
> + bgelr+
> + ori rRTN, rTMP2, 1
> + blr
> +
> +L(different):
> + lwz rWORD1, -4(rSTR1)
> + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
> + rlwinm rTMP, rWORD1, 8, 0xffffffff
> + rlwimi rTMP2, rWORD2, 24, 0, 7
> + rlwimi rTMP, rWORD1, 24, 0, 7
> + rlwimi rTMP2, rWORD2, 24, 16, 23
> + rlwimi rTMP, rWORD1, 24, 16, 23
> + xor. rBITDIF, rTMP, rTMP2
> + sub rRTN, rTMP, rTMP2
> + bgelr+
> + ori rRTN, rTMP2, 1
> + blr
> +
> +#else
> +L(endstring):
> and rTMP, r7F7F, rWORD1
> beq cr1, L(equal)
> add rTMP, rTMP, r7F7F
> @@ -84,7 +120,7 @@ L(different):
> L(highbit):
> ori rRTN, rWORD2, 1
> blr
> -
> +#endif
>
> /* Oh well. In this case, we just do a byte-by-byte comparison. */
> .align 4
> diff --git a/sysdeps/powerpc/powerpc64/strncmp.S b/sysdeps/powerpc/powerpc64/strncmp.S
> index 779d9f7..8f842c4 100644
> --- a/sysdeps/powerpc/powerpc64/strncmp.S
> +++ b/sysdeps/powerpc/powerpc64/strncmp.S
> @@ -25,7 +25,7 @@
> EALIGN (strncmp, 4, 0)
> CALL_MCOUNT 3
>
> -#define rTMP r0
> +#define rTMP2 r0
> #define rRTN r3
> #define rSTR1 r3 /* first string arg */
> #define rSTR2 r4 /* second string arg */
> @@ -36,6 +36,7 @@ EALIGN (strncmp, 4, 0)
> #define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
> #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
> #define rBITDIF r11 /* bits that differ in s1 & s2 words */
> +#define rTMP r12
>
> dcbt 0,rSTR1
> or rTMP, rSTR2, rSTR1
> @@ -77,12 +78,59 @@ L(g1): add rTMP, rFEFE, rWORD1
> we don't compare two strings as different because of gunk beyond
> the end of the strings... */
>
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> + addi rTMP2, rTMP, -1
> + beq cr1, L(equal)
> + andc rTMP2, rTMP2, rTMP
> + rldimi rTMP2, rTMP2, 1, 0
> + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
> + and rWORD1, rWORD1, rTMP2
> + cmpd cr1, rWORD1, rWORD2
> + beq cr1, L(equal)
> + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
> + neg rNEG, rBITDIF
> + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
> + cntlzd rNEG, rNEG /* bitcount of the bit. */
> + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
> + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
> + sld rWORD2, rWORD2, rNEG
> + xor. rBITDIF, rWORD1, rWORD2
> + sub rRTN, rWORD1, rWORD2
> + blt- L(highbit)
> + sradi rRTN, rRTN, 63 /* must return an int. */
> + ori rRTN, rRTN, 1
> + blr
> +L(equal):
> + li rRTN, 0
> + blr
> +
> +L(different):
> + ld rWORD1, -8(rSTR1)
> + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
> + neg rNEG, rBITDIF
> + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
> + cntlzd rNEG, rNEG /* bitcount of the bit. */
> + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
> + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
> + sld rWORD2, rWORD2, rNEG
> + xor. rBITDIF, rWORD1, rWORD2
> + sub rRTN, rWORD1, rWORD2
> + blt- L(highbit)
> + sradi rRTN, rRTN, 63
> + ori rRTN, rRTN, 1
> + blr
> +L(highbit):
> + sradi rRTN, rWORD2, 63
> + ori rRTN, rRTN, 1
> + blr
> +
> +#else
> L(endstring):
> and rTMP, r7F7F, rWORD1
> beq cr1, L(equal)
> add rTMP, rTMP, r7F7F
> xor. rBITDIF, rWORD1, rWORD2
> -
> andc rNEG, rNEG, rTMP
> blt- L(highbit)
> cntlzd rBITDIF, rBITDIF
> @@ -91,7 +139,7 @@ L(endstring):
> cmpd cr1, rNEG, rBITDIF
> sub rRTN, rWORD1, rWORD2
> blt- cr1, L(equal)
> - sradi rRTN, rRTN, 63
> + sradi rRTN, rRTN, 63 /* must return an int. */
> ori rRTN, rRTN, 1
> blr
> L(equal):
> @@ -99,7 +147,7 @@ L(equal):
> blr
>
> L(different):
> - ldu rWORD1, -8(rSTR1)
> + ld rWORD1, -8(rSTR1)
> xor. rBITDIF, rWORD1, rWORD2
> sub rRTN, rWORD1, rWORD2
> blt- L(highbit)
> @@ -107,11 +155,10 @@ L(different):
> ori rRTN, rRTN, 1
> blr
> L(highbit):
> - srdi rWORD2, rWORD2, 56
> - srdi rWORD1, rWORD1, 56
> - sub rRTN, rWORD1, rWORD2
> + sradi rRTN, rWORD2, 63
> + ori rRTN, rRTN, 1
> blr
> -
> +#endif
>
> /* Oh well. In this case, we just do a byte-by-byte comparison. */
> .align 4
> diff --git a/sysdeps/powerpc/powerpc32/strncmp.S b/sysdeps/powerpc/powerpc32/strncmp.S
> index fa345d2..e36a160 100644
> --- a/sysdeps/powerpc/powerpc32/strncmp.S
> +++ b/sysdeps/powerpc/powerpc32/strncmp.S
> @@ -24,7 +24,7 @@
>
> EALIGN (strncmp, 4, 0)
>
> -#define rTMP r0
> +#define rTMP2 r0
> #define rRTN r3
> #define rSTR1 r3 /* first string arg */
> #define rSTR2 r4 /* second string arg */
> @@ -35,6 +35,7 @@ EALIGN (strncmp, 4, 0)
> #define r7F7F r9 /* constant 0x7f7f7f7f */
> #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */
> #define rBITDIF r11 /* bits that differ in s1 & s2 words */
> +#define rTMP r12
>
> dcbt 0,rSTR1
> or rTMP, rSTR2, rSTR1
> @@ -73,12 +74,45 @@ L(g1): add rTMP, rFEFE, rWORD1
> we don't compare two strings as different because of gunk beyond
> the end of the strings... */
>
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> + slwi rTMP, rTMP, 1
> + addi rTMP2, rTMP, -1
> + andc rTMP2, rTMP2, rTMP
> + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
> + and rWORD1, rWORD1, rTMP2
> + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
> + rlwinm rTMP, rWORD1, 8, 0xffffffff
> + rlwimi rTMP2, rWORD2, 24, 0, 7
> + rlwimi rTMP, rWORD1, 24, 0, 7
> + rlwimi rTMP2, rWORD2, 24, 16, 23
> + rlwimi rTMP, rWORD1, 24, 16, 23
> + xor. rBITDIF, rTMP, rTMP2
> + sub rRTN, rTMP, rTMP2
> + bgelr+
> + ori rRTN, rTMP2, 1
> + blr
> +
> +L(different):
> + lwz rWORD1, -4(rSTR1)
> + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
> + rlwinm rTMP, rWORD1, 8, 0xffffffff
> + rlwimi rTMP2, rWORD2, 24, 0, 7
> + rlwimi rTMP, rWORD1, 24, 0, 7
> + rlwimi rTMP2, rWORD2, 24, 16, 23
> + rlwimi rTMP, rWORD1, 24, 16, 23
> + xor. rBITDIF, rTMP, rTMP2
> + sub rRTN, rTMP, rTMP2
> + bgelr+
> + ori rRTN, rTMP2, 1
> + blr
> +
> +#else
> L(endstring):
> and rTMP, r7F7F, rWORD1
> beq cr1, L(equal)
> add rTMP, rTMP, r7F7F
> xor. rBITDIF, rWORD1, rWORD2
> -
> andc rNEG, rNEG, rTMP
> blt- L(highbit)
> cntlzw rBITDIF, rBITDIF
> @@ -86,28 +120,20 @@ L(endstring):
> addi rNEG, rNEG, 7
> cmpw cr1, rNEG, rBITDIF
> sub rRTN, rWORD1, rWORD2
> - blt- cr1, L(equal)
> - srawi rRTN, rRTN, 31
> - ori rRTN, rRTN, 1
> - blr
> + bgelr+ cr1
> L(equal):
> li rRTN, 0
> blr
>
> L(different):
> - lwzu rWORD1, -4(rSTR1)
> + lwz rWORD1, -4(rSTR1)
> xor. rBITDIF, rWORD1, rWORD2
> sub rRTN, rWORD1, rWORD2
> - blt- L(highbit)
> - srawi rRTN, rRTN, 31
> - ori rRTN, rRTN, 1
> - blr
> + bgelr+
> L(highbit):
> - srwi rWORD2, rWORD2, 24
> - srwi rWORD1, rWORD1, 24
> - sub rRTN, rWORD1, rWORD2
> + ori rRTN, rWORD2, 1
> blr
> -
> +#endif
>
> /* Oh well. In this case, we just do a byte-by-byte comparison. */
> .align 4
> diff --git a/sysdeps/powerpc/powerpc64/power4/strncmp.S b/sysdeps/powerpc/powerpc64/power4/strncmp.S
> index 1276e16..5d136cf 100644
> --- a/sysdeps/powerpc/powerpc64/power4/strncmp.S
> +++ b/sysdeps/powerpc/powerpc64/power4/strncmp.S
> @@ -25,7 +25,7 @@
> EALIGN (strncmp, 4, 0)
> CALL_MCOUNT 3
>
> -#define rTMP r0
> +#define rTMP2 r0
> #define rRTN r3
> #define rSTR1 r3 /* first string arg */
> #define rSTR2 r4 /* second string arg */
> @@ -38,6 +38,7 @@ EALIGN (strncmp, 4, 0)
> #define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
> #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
> #define rBITDIF r11 /* bits that differ in s1 & s2 words */
> +#define rTMP r12
>
> dcbt 0,rSTR1
> or rTMP, rSTR2, rSTR1
> @@ -79,12 +80,59 @@ L(g1): add rTMP, rFEFE, rWORD1
> we don't compare two strings as different because of gunk beyond
> the end of the strings... */
>
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> + addi rTMP2, rTMP, -1
> + beq cr1, L(equal)
> + andc rTMP2, rTMP2, rTMP
> + rldimi rTMP2, rTMP2, 1, 0
> + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
> + and rWORD1, rWORD1, rTMP2
> + cmpd cr1, rWORD1, rWORD2
> + beq cr1, L(equal)
> + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
> + neg rNEG, rBITDIF
> + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
> + cntlzd rNEG, rNEG /* bitcount of the bit. */
> + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
> + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
> + sld rWORD2, rWORD2, rNEG
> + xor. rBITDIF, rWORD1, rWORD2
> + sub rRTN, rWORD1, rWORD2
> + blt- L(highbit)
> + sradi rRTN, rRTN, 63 /* must return an int. */
> + ori rRTN, rRTN, 1
> + blr
> +L(equal):
> + li rRTN, 0
> + blr
> +
> +L(different):
> + ld rWORD1, -8(rSTR1)
> + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
> + neg rNEG, rBITDIF
> + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
> + cntlzd rNEG, rNEG /* bitcount of the bit. */
> + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
> + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
> + sld rWORD2, rWORD2, rNEG
> + xor. rBITDIF, rWORD1, rWORD2
> + sub rRTN, rWORD1, rWORD2
> + blt- L(highbit)
> + sradi rRTN, rRTN, 63
> + ori rRTN, rRTN, 1
> + blr
> +L(highbit):
> + sradi rRTN, rWORD2, 63
> + ori rRTN, rRTN, 1
> + blr
> +
> +#else
> L(endstring):
> and rTMP, r7F7F, rWORD1
> beq cr1, L(equal)
> add rTMP, rTMP, r7F7F
> xor. rBITDIF, rWORD1, rWORD2
> -
> andc rNEG, rNEG, rTMP
> blt- L(highbit)
> cntlzd rBITDIF, rBITDIF
> @@ -93,7 +141,7 @@ L(endstring):
> cmpd cr1, rNEG, rBITDIF
> sub rRTN, rWORD1, rWORD2
> blt- cr1, L(equal)
> - sradi rRTN, rRTN, 63
> + sradi rRTN, rRTN, 63 /* must return an int. */
> ori rRTN, rRTN, 1
> blr
> L(equal):
> @@ -101,7 +149,7 @@ L(equal):
> blr
>
> L(different):
> - ldu rWORD1, -8(rSTR1)
> + ld rWORD1, -8(rSTR1)
> xor. rBITDIF, rWORD1, rWORD2
> sub rRTN, rWORD1, rWORD2
> blt- L(highbit)
> @@ -109,11 +157,10 @@ L(different):
> ori rRTN, rRTN, 1
> blr
> L(highbit):
> - srdi rWORD2, rWORD2, 56
> - srdi rWORD1, rWORD1, 56
> - sub rRTN, rWORD1, rWORD2
> + sradi rRTN, rWORD2, 63
> + ori rRTN, rRTN, 1
> blr
> -
> +#endif
>
> /* Oh well. In this case, we just do a byte-by-byte comparison. */
> .align 4
> diff --git a/sysdeps/powerpc/powerpc32/power4/strncmp.S b/sysdeps/powerpc/powerpc32/power4/strncmp.S
> index 724d908..89b961e 100644
> --- a/sysdeps/powerpc/powerpc32/power4/strncmp.S
> +++ b/sysdeps/powerpc/powerpc32/power4/strncmp.S
> @@ -24,7 +24,7 @@
>
> EALIGN (strncmp, 4, 0)
>
> -#define rTMP r0
> +#define rTMP2 r0
> #define rRTN r3
> #define rSTR1 r3 /* first string arg */
> #define rSTR2 r4 /* second string arg */
> @@ -37,6 +37,7 @@ EALIGN (strncmp, 4, 0)
> #define r7F7F r9 /* constant 0x7f7f7f7f */
> #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */
> #define rBITDIF r11 /* bits that differ in s1 & s2 words */
> +#define rTMP r12
>
> dcbt 0,rSTR1
> or rTMP, rSTR2, rSTR1
> @@ -75,12 +76,45 @@ L(g1): add rTMP, rFEFE, rWORD1
> we don't compare two strings as different because of gunk beyond
> the end of the strings... */
>
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> + slwi rTMP, rTMP, 1
> + addi rTMP2, rTMP, -1
> + andc rTMP2, rTMP2, rTMP
> + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
> + and rWORD1, rWORD1, rTMP2
> + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
> + rlwinm rTMP, rWORD1, 8, 0xffffffff
> + rldimi rTMP2, rWORD2, 24, 32
> + rldimi rTMP, rWORD1, 24, 32
> + rlwimi rTMP2, rWORD2, 24, 16, 23
> + rlwimi rTMP, rWORD1, 24, 16, 23
> + xor. rBITDIF, rTMP, rTMP2
> + sub rRTN, rTMP, rTMP2
> + bgelr+
> + ori rRTN, rTMP2, 1
> + blr
> +
> +L(different):
> + lwz rWORD1, -4(rSTR1)
> + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
> + rlwinm rTMP, rWORD1, 8, 0xffffffff
> + rldimi rTMP2, rWORD2, 24, 32
> + rldimi rTMP, rWORD1, 24, 32
> + rlwimi rTMP2, rWORD2, 24, 16, 23
> + rlwimi rTMP, rWORD1, 24, 16, 23
> + xor. rBITDIF, rTMP, rTMP2
> + sub rRTN, rTMP, rTMP2
> + bgelr+
> + ori rRTN, rTMP2, 1
> + blr
> +
> +#else
> L(endstring):
> and rTMP, r7F7F, rWORD1
> beq cr1, L(equal)
> add rTMP, rTMP, r7F7F
> xor. rBITDIF, rWORD1, rWORD2
> -
> andc rNEG, rNEG, rTMP
> blt- L(highbit)
> cntlzw rBITDIF, rBITDIF
> @@ -88,28 +122,20 @@ L(endstring):
> addi rNEG, rNEG, 7
> cmpw cr1, rNEG, rBITDIF
> sub rRTN, rWORD1, rWORD2
> - blt- cr1, L(equal)
> - srawi rRTN, rRTN, 31
> - ori rRTN, rRTN, 1
> - blr
> + bgelr+ cr1
> L(equal):
> li rRTN, 0
> blr
>
> L(different):
> - lwzu rWORD1, -4(rSTR1)
> + lwz rWORD1, -4(rSTR1)
> xor. rBITDIF, rWORD1, rWORD2
> sub rRTN, rWORD1, rWORD2
> - blt- L(highbit)
> - srawi rRTN, rRTN, 31
> - ori rRTN, rRTN, 1
> - blr
> + bgelr+
> L(highbit):
> - srwi rWORD2, rWORD2, 24
> - srwi rWORD1, rWORD1, 24
> - sub rRTN, rWORD1, rWORD2
> + ori rRTN, rWORD2, 1
> blr
> -
> +#endif
>
> /* Oh well. In this case, we just do a byte-by-byte comparison. */
> .align 4
> diff --git a/sysdeps/powerpc/powerpc64/power7/strncmp.S b/sysdeps/powerpc/powerpc64/power7/strncmp.S
> index 77ecad5..e618b01 100644
> --- a/sysdeps/powerpc/powerpc64/power7/strncmp.S
> +++ b/sysdeps/powerpc/powerpc64/power7/strncmp.S
> @@ -27,7 +27,7 @@
> EALIGN (strncmp,5,0)
> CALL_MCOUNT 3
>
> -#define rTMP r0
> +#define rTMP2 r0
> #define rRTN r3
> #define rSTR1 r3 /* first string arg */
> #define rSTR2 r4 /* second string arg */
> @@ -40,6 +40,7 @@ EALIGN (strncmp,5,0)
> #define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
> #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
> #define rBITDIF r11 /* bits that differ in s1 & s2 words */
> +#define rTMP r12
>
> dcbt 0,rSTR1
> nop
> @@ -83,12 +84,57 @@ L(g1): add rTMP,rFEFE,rWORD1
> we don't compare two strings as different because of gunk beyond
> the end of the strings... */
>
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> + addi rTMP2, rTMP, -1
> + beq cr1, L(equal)
> + andc rTMP2, rTMP2, rTMP
> + rldimi rTMP2, rTMP2, 1, 0
> + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
> + and rWORD1, rWORD1, rTMP2
> + cmpd cr1, rWORD1, rWORD2
> + beq cr1, L(equal)
> + cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */
> + addi rNEG, rBITDIF, 1
> + orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */
> + sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */
> + andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */
> + andc rWORD2, rWORD2, rNEG
> + xor. rBITDIF, rWORD1, rWORD2
> + sub rRTN, rWORD1, rWORD2
> + blt L(highbit)
> + sradi rRTN, rRTN, 63 /* must return an int. */
> + ori rRTN, rRTN, 1
> + blr
> +L(equal):
> + li rRTN, 0
> + blr
> +
> +L(different):
> + ld rWORD1, -8(rSTR1)
> + cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */
> + addi rNEG, rBITDIF, 1
> + orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */
> + sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */
> + andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */
> + andc rWORD2, rWORD2, rNEG
> + xor. rBITDIF, rWORD1, rWORD2
> + sub rRTN, rWORD1, rWORD2
> + blt L(highbit)
> + sradi rRTN, rRTN, 63
> + ori rRTN, rRTN, 1
> + blr
> +L(highbit):
> + sradi rRTN, rWORD2, 63
> + ori rRTN, rRTN, 1
> + blr
> +
> +#else
> L(endstring):
> and rTMP,r7F7F,rWORD1
> beq cr1,L(equal)
> add rTMP,rTMP,r7F7F
> xor. rBITDIF,rWORD1,rWORD2
> -
> andc rNEG,rNEG,rTMP
> blt L(highbit)
> cntlzd rBITDIF,rBITDIF
> @@ -97,7 +143,7 @@ L(endstring):
> cmpd cr1,rNEG,rBITDIF
> sub rRTN,rWORD1,rWORD2
> blt cr1,L(equal)
> - sradi rRTN,rRTN,63
> + sradi rRTN,rRTN,63 /* must return an int. */
> ori rRTN,rRTN,1
> blr
> L(equal):
> @@ -105,7 +151,7 @@ L(equal):
> blr
>
> L(different):
> - ldu rWORD1,-8(rSTR1)
> + ld rWORD1,-8(rSTR1)
> xor. rBITDIF,rWORD1,rWORD2
> sub rRTN,rWORD1,rWORD2
> blt L(highbit)
> @@ -113,11 +159,10 @@ L(different):
> ori rRTN,rRTN,1
> blr
> L(highbit):
> - srdi rWORD2,rWORD2,56
> - srdi rWORD1,rWORD1,56
> - sub rRTN,rWORD1,rWORD2
> + sradi rRTN,rWORD2,63
> + ori rRTN,rRTN,1
> blr
> -
> +#endif
>
> /* Oh well. In this case, we just do a byte-by-byte comparison. */
> .align 4
> diff --git a/sysdeps/powerpc/powerpc32/power7/strncmp.S b/sysdeps/powerpc/powerpc32/power7/strncmp.S
> index fdae44d..10c9d25 100644
> --- a/sysdeps/powerpc/powerpc32/power7/strncmp.S
> +++ b/sysdeps/powerpc/powerpc32/power7/strncmp.S
> @@ -26,7 +26,7 @@
>
> EALIGN (strncmp,5,0)
>
> -#define rTMP r0
> +#define rTMP2 r0
> #define rRTN r3
> #define rSTR1 r3 /* first string arg */
> #define rSTR2 r4 /* second string arg */
> @@ -39,6 +39,7 @@ EALIGN (strncmp,5,0)
> #define r7F7F r9 /* constant 0x7f7f7f7f */
> #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */
> #define rBITDIF r11 /* bits that differ in s1 & s2 words */
> +#define rTMP r12
>
> dcbt 0,rSTR1
> nop
> @@ -78,13 +79,45 @@ L(g1): add rTMP,rFEFE,rWORD1
> /* OK. We've hit the end of the string. We need to be careful that
> we don't compare two strings as different because of gunk beyond
> the end of the strings... */
> +#ifdef __LITTLE_ENDIAN__
> +L(endstring):
> + slwi rTMP, rTMP, 1
> + addi rTMP2, rTMP, -1
> + andc rTMP2, rTMP2, rTMP
> + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
> + and rWORD1, rWORD1, rTMP2
> + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
> + rlwinm rTMP, rWORD1, 8, 0xffffffff
> + rldimi rTMP2, rWORD2, 24, 32
> + rldimi rTMP, rWORD1, 24, 32
> + rlwimi rTMP2, rWORD2, 24, 16, 23
> + rlwimi rTMP, rWORD1, 24, 16, 23
> + xor. rBITDIF, rTMP, rTMP2
> + sub rRTN, rTMP, rTMP2
> + bgelr
> + ori rRTN, rTMP2, 1
> + blr
> +
> +L(different):
> + lwz rWORD1, -4(rSTR1)
> + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
> + rlwinm rTMP, rWORD1, 8, 0xffffffff
> + rldimi rTMP2, rWORD2, 24, 32
> + rldimi rTMP, rWORD1, 24, 32
> + rlwimi rTMP2, rWORD2, 24, 16, 23
> + rlwimi rTMP, rWORD1, 24, 16, 23
> + xor. rBITDIF, rTMP, rTMP2
> + sub rRTN, rTMP, rTMP2
> + bgelr
> + ori rRTN, rTMP2, 1
> + blr
>
> +#else
> L(endstring):
> and rTMP,r7F7F,rWORD1
> beq cr1,L(equal)
> add rTMP,rTMP,r7F7F
> xor. rBITDIF,rWORD1,rWORD2
> -
> andc rNEG,rNEG,rTMP
> blt L(highbit)
> cntlzw rBITDIF,rBITDIF
> @@ -92,28 +125,20 @@ L(endstring):
> addi rNEG,rNEG,7
> cmpw cr1,rNEG,rBITDIF
> sub rRTN,rWORD1,rWORD2
> - blt cr1,L(equal)
> - srawi rRTN,rRTN,31
> - ori rRTN,rRTN,1
> - blr
> + bgelr cr1
> L(equal):
> li rRTN,0
> blr
>
> L(different):
> - lwzu rWORD1,-4(rSTR1)
> + lwz rWORD1,-4(rSTR1)
> xor. rBITDIF,rWORD1,rWORD2
> sub rRTN,rWORD1,rWORD2
> - blt L(highbit)
> - srawi rRTN,rRTN,31
> - ori rRTN,rRTN,1
> - blr
> + bgelr
> L(highbit):
> - srwi rWORD2,rWORD2,24
> - srwi rWORD1,rWORD1,24
> - sub rRTN,rWORD1,rWORD2
> + ori rRTN, rWORD2, 1
> blr
> -
> +#endif
>
> /* Oh well. In this case, we just do a byte-by-byte comparison. */
> .align 4
>