[PATCH] PowerPC - A2 memcpy cache line size initialization
Adhemerval Zanella
azanella@linux.vnet.ibm.com
Tue Jan 24 17:21:00 GMT 2012
The PPC A2 memcpy implementation relies on a correct __cache_line_size value
which is currently initialized on '__libc_start_main'
(sysdeps/unix/sysv/linux/powerpc/libc-start.c).
In some conditions (a dso constructor for instance) the cache line
size is not yet initialized and calling memset may result in a
SEGFAULT. This patch checks if the cache line size is initialized
and if not, uses simple memcpy algorithm without any prefetch (dcbz)
instructions.
---
2012-01-19 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* sysdeps/powerpc/powerpc32/a2/memcpy.S: Fix for when cache line size is
not set.
* sysdeps/powerpc/powerpc64/a2/memcpy.S: Likewise.
diff --git a/sysdeps/powerpc/powerpc32/a2/memcpy.S b/sysdeps/powerpc/powerpc32/a2/memcpy.S
index 472f7a3..1aba988 100644
--- a/sysdeps/powerpc/powerpc32/a2/memcpy.S
+++ b/sysdeps/powerpc/powerpc32/a2/memcpy.S
@@ -1,5 +1,5 @@
/* Optimized memcpy implementation for PowerPC A2.
- Copyright (C) 2010 Free Software Foundation, Inc.
+ Copyright (C) 2010, 2012 Free Software Foundation, Inc.
Contributed by Michael Brutman <brutman@us.ibm.com>.
This file is part of the GNU C Library.
@@ -128,8 +128,29 @@ L(dst_aligned):
cmplwi cr5, r9, 0
bne+ cr5,L(cachelineset)
- li r9,64
+/* __cache_line_size not set: generic byte copy without much optimization */
+ andi. r0,r5,1 /* If length is odd copy one byte. */
+ beq L(cachelinenotset_align)
+ lbz r7,0(r4) /* Read one byte from source. */
+ addi r5,r5,-1 /* Update length. */
+ addi r4,r4,1 /* Update source pointer address. */
+ stb r7,0(r6) /* Store one byte on dest. */
+ addi r6,r6,1 /* Update dest pointer address. */
+L(cachelinenotset_align):
+ cmpwi cr7,r5,0 /* If length is 0 return. */
+ beqlr cr7
+ ori r2,r2,0 /* Force a new dispatch group. */
+L(cachelinenotset_loop):
+ addic. r5,r5,-2 /* Update length. */
+ lbz r7,0(r4) /* Load 2 bytes from source. */
+ lbz r8,1(r4)
+ addi r4,r4,2 /* Update source pointer address. */
+ stb r7,0(r6) /* Store 2 bytes on dest. */
+ stb r8,1(r6)
+ addi r6,r6,2 /* Update dest pointer address. */
+ bne L(cachelinenotset_loop)
+ blr
L(cachelineset):
diff --git a/sysdeps/powerpc/powerpc64/a2/memcpy.S b/sysdeps/powerpc/powerpc64/a2/memcpy.S
index ac95171..f0c17ce 100644
--- a/sysdeps/powerpc/powerpc64/a2/memcpy.S
+++ b/sysdeps/powerpc/powerpc64/a2/memcpy.S
@@ -1,5 +1,5 @@
/* Optimized memcpy implementation for PowerPC A2.
- Copyright (C) 2010 Free Software Foundation, Inc.
+ Copyright (C) 2010,2012 Free Software Foundation, Inc.
Contributed by Michael Brutman <brutman@us.ibm.com>.
This file is part of the GNU C Library.
@@ -117,9 +117,35 @@ EALIGN (BP_SYM (memcpy), 5, 0)
.align 4
L(dst_aligned):
+ cmpdi cr0,r9,0 /* Cache line size set? */
+ bne+ cr0,L(cachelineset)
+
+/* __cache_line_size not set: generic byte copy without much optimization */
+ clrldi. r0,r5,63 /* If length is odd copy one byte */
+ beq L(cachelinenotset_align)
+ lbz r7,0(r4) /* Read one byte from source */
+ addi r5,r5,-1 /* Update length */
+ addi r4,r4,1 /* Update source pointer address */
+ stb r7,0(r6) /* Store one byte at dest */
+ addi r6,r6,1 /* Update dest pointer address */
+L(cachelinenotset_align):
+ cmpdi cr7,r5,0 /* If length is 0 return */
+ beqlr cr7
+ ori r2,r2,0 /* Force a new dispatch group */
+L(cachelinenotset_loop):
+ addic. r5,r5,-2 /* Update length */
+ lbz r7,0(r4) /* Load 2 bytes from source */
+ lbz r8,1(r4)
+ addi r4,r4,2 /* Update source pointer address */
+ stb r7,0(r6) /* Store 2 bytes on dest */
+ stb r8,1(r6)
+ addi r6,r6,2 /* Update dest pointer address */
+ bne L(cachelinenotset_loop)
+ blr
- cmpd cr5,r5,r10 /* Less than a cacheline to go? */
+L(cachelineset):
+ cmpd cr5,r5,r10 /* Less than a cacheline to go? */
neg r7,r6 /* How far to next cacheline bdy? */
--
1.7.1
More information about the Libc-alpha
mailing list