[PATCH] PPC64 strcmp and strcpy

Steven Munroe sjmunroe@us.ibm.com
Tue Apr 15 14:11:00 GMT 2003


Completed the 64-bit conversion of strcmp.S and strcpy.S for PPC64. In the
process removed some of extra extsw's added by Jakub as unnecessary. These
changes pass Jukub's new string tests. Additional optimizations pending for the
unaligned case.

2003-04-15  Steven Munroe  <sjmunroe@us.ibm.com>
        * sysdeps/powerpc/powerpc64/strcmp.S: Convert to full 64-bit.
        * sysdeps/powerpc/powerpc64/strcpy.S: Convert to full 64-bit.


-- 
Steven Munroe
sjmunroe@us.ibm.com
Linux on PowerPC-64 Development
GLIBC for PowerPC-64 Development
-------------- next part --------------
diff -urN libc23-cvstip-20030414/sysdeps/powerpc/powerpc64/strcmp.S libc23/sysdeps/powerpc/powerpc64/strcmp.S
--- libc23-cvstip-20030414/sysdeps/powerpc/powerpc64/strcmp.S	2003-04-11 17:11:12.000000000 -0500
+++ libc23/sysdeps/powerpc/powerpc64/strcmp.S	2003-04-14 15:58:40.000000000 -0500
@@ -1,5 +1,5 @@
 /* Optimized strcmp implementation for PowerPC64.
-   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -31,42 +31,50 @@
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
+/* Note:  The Bounded pointer support in this code is broken.  This code
+   was inherited from PPC32 and and that support was never completed.  
+   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
+   These artifacts are left in the code as a reminder in case we need
+   bounded pointer support in the future.  */
 #if __BOUNDED_POINTERS__
 # define rHIGH1	r11
 # define rHIGH2 r12
 #endif
 #define rWORD1	r5	/* current word in s1 */
 #define rWORD2	r6	/* current word in s2 */
-#define rFEFE	r7	/* constant 0xfefefeff (-0x01010101) */
-#define r7F7F	r8	/* constant 0x7f7f7f7f */
-#define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f) */
+#define rFEFE	r7	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
+#define r7F7F	r8	/* constant 0x7f7f7f7f7f7f7f7f */
+#define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rBITDIF	r10	/* bits that differ in s1 & s2 words */
 
 	CHECK_BOUNDS_LOW (rSTR1, rTMP, rHIGH1)
 	CHECK_BOUNDS_LOW (rSTR2, rTMP, rHIGH2)
 
+	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
-	clrldi.	rTMP, rTMP, 62
+	dcbt	0,rSTR2
+	clrldi.	rTMP, rTMP, 61
 	lis	rFEFE, -0x101
 	bne	L(unaligned)
 
-	lwz	rWORD1, 0(rSTR1)
-	lwz	rWORD2, 0(rSTR2)
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
 	lis	r7F7F, 0x7f7f
 	addi	rFEFE, rFEFE, -0x101
-	clrldi	rFEFE,rFEFE,32 /* clear upper 32 */
 	addi	r7F7F, r7F7F, 0x7f7f
+	sldi	rTMP, rFEFE, 32
+	insrdi	r7F7F, r7F7F, 32, 0
+	add	rFEFE, rFEFE, rTMP
 	b	L(g1)
 
-L(g0):	lwzu	rWORD1, 4(rSTR1)
+L(g0):	ldu	rWORD1, 8(rSTR1)
 	bne	cr1, L(different)
-	lwzu	rWORD2, 4(rSTR2)
+	ldu	rWORD2, 8(rSTR2)
 L(g1):	add	rTMP, rFEFE, rWORD1
 	nor	rNEG, r7F7F, rWORD1
 
-	clrldi	rNEG,rNEG,32 /* clear upper 32 */
 	and.	rTMP, rTMP, rNEG
-	cmpw	cr1, rWORD1, rWORD2
+	cmpd	cr1, rWORD1, rWORD2
 	beq+	L(g0)
 L(endstring):
 /* OK. We've hit the end of the string. We need to be careful that
@@ -77,32 +85,34 @@
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
 
-	extsw.	rBITDIF,rBITDIF /* propagate sign for blt */
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
-	cntlzw	rBITDIF, rBITDIF
-	cntlzw	rNEG, rNEG
+	cntlzd	rBITDIF, rBITDIF
+	cntlzd	rNEG, rNEG
 	addi	rNEG, rNEG, 7
-	cmpw	cr1, rNEG, rBITDIF
+	cmpd	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
-	extsw	rRTN, rRTN
-	bgelr+	cr1
+	blt-	cr1, L(equal)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
 L(equal):
 	li	rRTN, 0
 	/* GKM FIXME: check high bounds.  */
 	blr
 
 L(different):
-	lwz	rWORD1, -4(rSTR1)
+	ld	rWORD1, -8(rSTR1)
 	xor.	rBITDIF, rWORD1, rWORD2
-
-	extsw.	rBITDIF,rBITDIF /* propagate sign for bgelr */
 	sub	rRTN, rWORD1, rWORD2
-	extsw	rRTN, rRTN
-	bgelr+
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
 L(highbit):
-	ori	rRTN, rWORD2, 1
-	extsw	rRTN, rRTN
+	srdi	rWORD2, rWORD2, 56
+	srdi	rWORD1, rWORD1, 56
+	sub	rRTN, rWORD1, rWORD2
 	/* GKM FIXME: check high bounds.  */
 	blr
 
@@ -119,20 +129,18 @@
 	lbzu	rWORD2, 1(rSTR2)
 L(u1):	cmpwi	cr1, rWORD1, 0
 	beq-	cr1, L(u3)
-	cmpw	rWORD1, rWORD2
+	cmpd	rWORD1, rWORD2
 	bne-	L(u3)
 	lbzu	rWORD1, 1(rSTR1)
 	lbzu	rWORD2, 1(rSTR2)
-	cmpwi	cr1, rWORD1, 0
-	cmpw	rWORD1, rWORD2
+	cmpdi	cr1, rWORD1, 0
+	cmpd	rWORD1, rWORD2
 	bne+	cr1, L(u0)
 L(u3):	sub	rRTN, rWORD1, rWORD2
-	extsw	rRTN, rRTN
 	/* GKM FIXME: check high bounds.  */
 	blr
 L(u4):	lbz	rWORD1, -1(rSTR1)
 	sub	rRTN, rWORD1, rWORD2
-	extsw	rRTN, rRTN
 	/* GKM FIXME: check high bounds.  */
 	blr
 END (BP_SYM (strcmp))
-------------- next part --------------
diff -urN libc23-cvstip-20030414/sysdeps/powerpc/powerpc64/strcpy.S libc23/sysdeps/powerpc/powerpc64/strcpy.S
--- libc23-cvstip-20030414/sysdeps/powerpc/powerpc64/strcpy.S	2002-09-17 18:50:02.000000000 -0500
+++ libc23/sysdeps/powerpc/powerpc64/strcpy.S	2003-04-14 15:58:47.000000000 -0500
@@ -1,5 +1,5 @@
 /* Optimized strcpy implementation for PowerPC64.
-   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -29,6 +29,11 @@
 
 #define rTMP	r0
 #define rRTN	r3	/* incoming DEST arg preserved as result */
+/* Note.  The Bounded pointer support in this code is broken.  This code
+   was inherited from PPC32 and and that support was never completed.  
+   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
+   These artifacts are left in the code as a reminder in case we need
+   bounded pointer support in the future.  */
 #if __BOUNDED_POINTERS__
 # define rDEST	r4	/* pointer to previous word in dest */
 # define rSRC	r5	/* pointer to previous word in src */
@@ -39,39 +44,44 @@
 # define rDEST	r5	/* pointer to previous word in dest */
 #endif
 #define rWORD	r6	/* current word from src */
-#define rFEFE	r7	/* constant 0xfefefeff (-0x01010101) */
-#define r7F7F	r8	/* constant 0x7f7f7f7f */
-#define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f) */
+#define rFEFE	r7	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
+#define r7F7F	r8	/* constant 0x7f7f7f7f7f7f7f7f */
+#define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rALT	r10	/* alternate word from src */
 
 	CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH)
 	CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH)
 	STORE_RETURN_BOUNDS (rLOW, rHIGH)
 
+	dcbt	0,rSRC
 	or	rTMP, rSRC, rRTN
-	clrldi.	rTMP, rTMP, 62
+	clrldi.	rTMP, rTMP, 61
 #if __BOUNDED_POINTERS__
-	addi	rDEST, rDEST, -4
+	addi	rDEST, rDEST, -8
 #else
-	addi	rDEST, rRTN, -4
+	addi	rDEST, rRTN, -8
 #endif
+	dcbtst	0,rRTN
 	bne	L(unaligned)
 
 	lis	rFEFE, -0x101
 	lis	r7F7F, 0x7f7f
-	lwz	rWORD, 0(rSRC)
+	ld	rWORD, 0(rSRC)
 	addi	rFEFE, rFEFE, -0x101
 	addi	r7F7F, r7F7F, 0x7f7f
+	sldi	rTMP, rFEFE, 32
+	insrdi	r7F7F, r7F7F, 32, 0
+	add	rFEFE, rFEFE, rTMP
 	b	L(g2)
 
-L(g0):	lwzu	rALT, 4(rSRC)
-	stwu	rWORD, 4(rDEST)
+L(g0):	ldu	rALT, 8(rSRC)
+	stdu	rWORD, 8(rDEST)
 	add	rTMP, rFEFE, rALT
 	nor	rNEG, r7F7F, rALT
 	and.	rTMP, rTMP, rNEG
 	bne-	L(g1)
-	lwzu	rWORD, 4(rSRC)
-	stwu	rALT, 4(rDEST)
+	ldu	rWORD, 8(rSRC)
+	stdu	rALT, 8(rDEST)
 L(g2):	add	rTMP, rFEFE, rWORD
 	nor	rNEG, r7F7F, rWORD
 	and.	rTMP, rTMP, rNEG
@@ -79,16 +89,29 @@
 
 	mr	rALT, rWORD
 /* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
-	stb	rTMP, 4(rDEST)
+L(g1):
+	extrdi.	rTMP, rALT, 8, 0
+	stb	rTMP, 8(rDEST)
 	beqlr-
-	rlwinm.	rTMP, rALT, 16, 24, 31
-	stb	rTMP, 5(rDEST)
+	extrdi.	rTMP, rALT, 8, 8
+	stb	rTMP, 9(rDEST)
 	beqlr-
-	rlwinm.	rTMP, rALT, 24, 24, 31
-	stb	rTMP, 6(rDEST)
+	extrdi.	rTMP, rALT, 8, 16
+	stb	rTMP, 10(rDEST)
 	beqlr-
-	stb	rALT, 7(rDEST)
+	extrdi.	rTMP, rALT, 8, 24
+	stb	rTMP, 11(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 32
+	stb	rTMP, 12(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 40
+	stb	rTMP, 13(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 48
+	stb	rTMP, 14(rDEST)
+	beqlr-
+	stb	rALT, 15(rDEST)
 	/* GKM FIXME: check high bound.  */
 	blr
 


More information about the Libc-alpha mailing list