This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] Optimize mempcpy on sparc.
- From: David Miller <davem at davemloft dot net>
- To: libc-alpha at sourceware dot org
- Date: Thu, 29 Mar 2012 02:56:15 -0400 (EDT)
- Subject: [PATCH] Optimize mempcpy on sparc.
This turned out to be very easy on sparc, as all we need is a
small stub to setup the return value differently from how the
memcpy implementation entry point does, and then branch into
the memcpy code past that return value setup.
So unlike on other architectures we don't need a completely seperate
copy of the memcpy implementation emitted just to get mempcpy
optimized. All the code is shared.
Committed to master.
* sysdeps/sparc/sparc32/memcpy.S: Implement mempcpy using a stub
that branches into memcpy.
* sysdeps/sparc/sparc64/memcpy.S: Likewise.
* sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S: Likewise.
* sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: Likewise.
* sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: Likewise.
* sysdeps/sparc/sparc64/multiarch/memcpy.S: Add mempcpy multiarch
bits.
* sysdeps/sparc/sparc64/rtld-memcpy.c: Include generic mempcpy
implementation too.
* sysdeps/sparc/mempcpy.S: New file.
---
ChangeLog | 12 ++++
sysdeps/sparc/mempcpy.S | 1 +
sysdeps/sparc/sparc32/memcpy.S | 14 ++++-
sysdeps/sparc/sparc64/memcpy.S | 23 ++++++---
sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S | 10 +++-
sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S | 10 +++-
sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S | 6 ++
sysdeps/sparc/sparc64/multiarch/memcpy.S | 60 +++++++++++++++++++++
sysdeps/sparc/sparc64/rtld-memcpy.c | 1 +
9 files changed, 124 insertions(+), 13 deletions(-)
create mode 100644 sysdeps/sparc/mempcpy.S
diff --git a/ChangeLog b/ChangeLog
index 555c395..23b7290 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,17 @@
2012-03-28 David S. Miller <davem@davemloft.net>
+ * sysdeps/sparc/sparc32/memcpy.S: Implement mempcpy using a stub
+ that branches into memcpy.
+ * sysdeps/sparc/sparc64/memcpy.S: Likewise.
+ * sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S: Likewise.
+ * sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: Likewise.
+ * sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: Likewise.
+ * sysdeps/sparc/sparc64/multiarch/memcpy.S: Add mempcpy multiarch
+ bits.
+ * sysdeps/sparc/sparc64/rtld-memcpy.c: Include generic mempcpy
+ implementation too.
+ * sysdeps/sparc/mempcpy.S: New file.
+
* sysdeps/sparc/sparc64/multiarch/memcpy.S: Provide a hidden def to
the IFUNC routine in the libc case.
* sysdeps/sparc/sparc64/multiarch/memcpy.S: Likewise.
diff --git a/sysdeps/sparc/mempcpy.S b/sysdeps/sparc/mempcpy.S
new file mode 100644
index 0000000..4c98013
--- /dev/null
+++ b/sysdeps/sparc/mempcpy.S
@@ -0,0 +1 @@
+/* mempcpy is in memcpy.S */
diff --git a/sysdeps/sparc/sparc32/memcpy.S b/sysdeps/sparc/sparc32/memcpy.S
index a36f1d6..82fa6d1 100644
--- a/sysdeps/sparc/sparc32/memcpy.S
+++ b/sysdeps/sparc/sparc32/memcpy.S
@@ -104,11 +104,17 @@
std %t2, [%dst + offset + offset2 + 0x08];
.text
- .align 4
+ENTRY(__mempcpy)
+ add %o0, %o2, %g1
+ ba 101f
+ st %g1, [%sp + 64]
+END(__mempcpy)
+ .align 4
ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */
- sub %o0, %o1, %o4
st %o0, [%sp + 64]
+101:
+ sub %o0, %o1, %o4
9: andcc %o4, 3, %o5
0: bne 86f
cmp %o2, 15
@@ -641,3 +647,7 @@ ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */
END(memcpy)
libc_hidden_builtin_def (memcpy)
+
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
diff --git a/sysdeps/sparc/sparc64/memcpy.S b/sysdeps/sparc/sparc64/memcpy.S
index a77c4e4..668ebec 100644
--- a/sysdeps/sparc/sparc64/memcpy.S
+++ b/sysdeps/sparc/sparc64/memcpy.S
@@ -374,19 +374,24 @@ ENTRY(__memcpy_large)
mov %g4, %o0
END(__memcpy_large)
+ENTRY(__mempcpy)
+ ba,pt %xcc, 210f
+ add %o0, %o2, %g4
+END(__mempcpy)
+
.align 32
ENTRY(memcpy)
+ mov %o0, %g4 /* IEU0 Group */
210:
#ifndef USE_BPR
- srl %o2, 0, %o2 /* IEU1 Group */
+ srl %o2, 0, %o2 /* IEU1 */
#endif
brz,pn %o2, 209b /* CTI Group */
- mov %o0, %g4 /* IEU0 */
-218: cmp %o2, 15 /* IEU1 Group */
- bleu,pn %xcc, 208b /* CTI */
- cmp %o2, (64 * 6) /* IEU1 Group */
- bgeu,pn %xcc, 200b /* CTI */
- andcc %o0, 7, %g2 /* IEU1 Group */
+218: cmp %o2, 15 /* IEU1 */
+ bleu,pn %xcc, 208b /* CTI Group */
+ cmp %o2, (64 * 6) /* IEU1 */
+ bgeu,pn %xcc, 200b /* CTI Group */
+ andcc %o0, 7, %g2 /* IEU1 */
sub %o0, %o1, %g5 /* IEU0 */
andcc %g5, 3, %o5 /* IEU1 Group */
bne,pn %xcc, 212f /* CTI */
@@ -569,3 +574,7 @@ ENTRY(memcpy)
END(memcpy)
libc_hidden_builtin_def (memcpy)
+
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S
index 8bada0e..6ba1b0c 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S
@@ -43,13 +43,19 @@
.text
+ENTRY(__mempcpy_niagara1)
+ ba,pt %XCC, 101f
+ add %o0, %o2, %g5
+END(__mempcpy_niagara1)
+
.align 32
ENTRY(__memcpy_niagara1)
+100: /* %o0=dst, %o1=src, %o2=len */
+ mov %o0, %g5
+101:
# ifndef USE_BPR
srl %o2, 0, %o2
# endif
-100: /* %o0=dst, %o1=src, %o2=len */
- mov %o0, %g5
cmp %o2, 0
be,pn %XCC, 85f
218: or %o0, %o1, %o3
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
index ccbb025..0e9442d 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
@@ -145,13 +145,19 @@
.text
+ENTRY(__mempcpy_niagara2)
+ ba,pt %XCC, 101f
+ add %o0, %o2, %g5
+END(__mempcpy_niagara2)
+
.align 32
ENTRY(__memcpy_niagara2)
+100: /* %o0=dst, %o1=src, %o2=len */
+ mov %o0, %g5
+101:
# ifndef USE_BPR
srl %o2, 0, %o2
# endif
-100: /* %o0=dst, %o1=src, %o2=len */
- mov %o0, %g5
cmp %o2, 0
be,pn %XCC, 85f
218: or %o0, %o1, %o3
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
index 7e21665..0784ba9 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
@@ -38,6 +38,11 @@
.text
+ENTRY(__mempcpy_ultra3)
+ ba,pt %XCC, 101f
+ add %o0, %o2, %g5
+END(__mempcpy_ultra3)
+
/* Special/non-trivial issues of this code:
*
* 1) %o5 is preserved from VISEntryHalf to VISExitHalf
@@ -57,6 +62,7 @@ ENTRY(__memcpy_ultra3)
100: /* %o0=dst, %o1=src, %o2=len */
mov %o0, %g5
+101:
cmp %o2, 0
be,pn %XCC, out
218: or %o0, %o1, %o3
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy.S
index 0f3751e..20c72d9 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy.S
@@ -72,12 +72,72 @@ ENTRY(memcpy)
mov %o1, %o0
END(memcpy)
+ENTRY(__mempcpy)
+ .type __mempcpy, @gnu_indirect_function
+# ifdef SHARED
+ SETUP_PIC_REG_LEAF(o3, o5)
+# endif
+ andcc %o0, HWCAP_SPARC_N2, %g0
+ be 1f
+ andcc %o0, HWCAP_SPARC_BLKINIT, %g0
+# ifdef SHARED
+ sethi %gdop_hix22(__mempcpy_niagara2), %o1
+ xor %o1, %gdop_lox10(__mempcpy_niagara2), %o1
+# else
+ set __mempcpy_niagara2, %o1
+# endif
+ ba 10f
+ nop
+1: be 1f
+ andcc %o0, HWCAP_SPARC_ULTRA3, %g0
+# ifdef SHARED
+ sethi %gdop_hix22(__mempcpy_niagara1), %o1
+ xor %o1, %gdop_lox10(__mempcpy_niagara1), %o1
+# else
+ set __mempcpy_niagara1, %o1
+# endif
+ ba 10f
+ nop
+1: be 9f
+ nop
+# ifdef SHARED
+ sethi %gdop_hix22(__mempcpy_ultra3), %o1
+ xor %o1, %gdop_lox10(__mempcpy_ultra3), %o1
+# else
+ set __mempcpy_ultra3, %o1
+# endif
+ ba 10f
+ nop
+9:
+# ifdef SHARED
+ sethi %gdop_hix22(__mempcpy_ultra1), %o1
+ xor %o1, %gdop_lox10(__mempcpy_ultra1), %o1
+# else
+ set __mempcpy_ultra1, %o1
+# endif
+10:
+# ifdef SHARED
+ add %o3, %o1, %o1
+# endif
+ retl
+ mov %o1, %o0
+END(__mempcpy)
+
libc_hidden_builtin_def (memcpy)
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
+
#undef libc_hidden_builtin_def
#define libc_hidden_builtin_def(name)
+#undef weak_alias
+#define weak_alias(x, y)
+#undef libc_hidden_def
+#define libc_hidden_def(name)
#define memcpy __memcpy_ultra1
+#define __mempcpy __mempcpy_ultra1
#endif
diff --git a/sysdeps/sparc/sparc64/rtld-memcpy.c b/sysdeps/sparc/sparc64/rtld-memcpy.c
index 5e50e6e..b1b0647 100644
--- a/sysdeps/sparc/sparc64/rtld-memcpy.c
+++ b/sysdeps/sparc/sparc64/rtld-memcpy.c
@@ -1 +1,2 @@
#include <string/memcpy.c>
+#include <string/mempcpy.c>
--
1.7.9.1