This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.23-500-ga024b39
- From: wilco at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 22 Jun 2016 12:40:56 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.23-500-ga024b39
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via a024b39a4e31a049391b459234f6b3575c9fc107 (commit)
from a3b473373ee43a292f5ec68a7fda6b9cfb26a9b0 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a024b39a4e31a049391b459234f6b3575c9fc107
commit a024b39a4e31a049391b459234f6b3575c9fc107
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date: Wed Jun 22 13:24:24 2016 +0100
This patch further tunes memcpy - avoid one branch for sizes 1-3,
add a prefetch and improve small copies that are exact powers of 2.
* sysdeps/aarch64/memcpy.S (memcpy):
Further tuning for performance.
diff --git a/ChangeLog b/ChangeLog
index e418cc0..cb8bdd9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2016-06-22 Wilco Dijkstra <wdijkstr@arm.com>
+
+ * sysdeps/aarch64/memcpy.S (memcpy):
+ Further tuning for performance.
+
2016-06-21 Florian Weimer <fweimer@redhat.com>
[BZ #20284]
diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
index c256828..de73f0f 100644
--- a/sysdeps/aarch64/memcpy.S
+++ b/sysdeps/aarch64/memcpy.S
@@ -35,6 +35,7 @@
#define A_h x7
#define A_hw w7
#define B_l x8
+#define B_lw w8
#define B_h x9
#define C_l x10
#define C_h x11
@@ -70,21 +71,40 @@ END (memmove)
libc_hidden_builtin_def (memmove)
ENTRY (memcpy)
+ prfm PLDL1KEEP, [src]
add srcend, src, count
add dstend, dstin, count
+ cmp count, 16
+ b.ls L(copy16)
cmp count, 96
b.hi L(copy_long)
- cmp count, 16
- b.hs L(copy_medium)
+ /* Medium copies: 17..96 bytes. */
+ sub tmp1, count, 1
+ ldp A_l, A_h, [src]
+ tbnz tmp1, 6, L(copy96)
+ ldp D_l, D_h, [srcend, -16]
+ tbz tmp1, 5, 1f
+ ldp B_l, B_h, [src, 16]
+ ldp C_l, C_h, [srcend, -32]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstend, -32]
+1:
+ stp A_l, A_h, [dstin]
+ stp D_l, D_h, [dstend, -16]
+ ret
+
+ .p2align 4
/* Small copies: 0..16 bytes. */
L(copy16):
- tbz count, 3, 1f
+ cmp count, 8
+ b.lo 1f
ldr A_l, [src]
ldr A_h, [srcend, -8]
str A_l, [dstin]
str A_h, [dstend, -8]
ret
+ .p2align 4
1:
tbz count, 2, 1f
ldr A_lw, [src]
@@ -92,33 +112,21 @@ L(copy16):
str A_lw, [dstin]
str A_hw, [dstend, -4]
ret
- .p2align 4
+
+ /* Copy 0..3 bytes. Use a branchless sequence that copies the same
+ byte 3 times if count==1, or the 2nd byte twice if count==2. */
1:
cbz count, 2f
+ lsr tmp1, count, 1
ldrb A_lw, [src]
- tbz count, 1, 1f
- ldrh A_hw, [srcend, -2]
- strh A_hw, [dstend, -2]
-1: strb A_lw, [dstin]
+ ldrb A_hw, [srcend, -1]
+ ldrb B_lw, [src, tmp1]
+ strb A_lw, [dstin]
+ strb B_lw, [dstin, tmp1]
+ strb A_hw, [dstend, -1]
2: ret
.p2align 4
- /* Medium copies: 17..96 bytes. */
-L(copy_medium):
- ldp A_l, A_h, [src]
- tbnz count, 6, L(copy96)
- ldp D_l, D_h, [srcend, -16]
- tbz count, 5, 1f
- ldp B_l, B_h, [src, 16]
- ldp C_l, C_h, [srcend, -32]
- stp B_l, B_h, [dstin, 16]
- stp C_l, C_h, [dstend, -32]
-1:
- stp A_l, A_h, [dstin]
- stp D_l, D_h, [dstend, -16]
- ret
-
- .p2align 4
/* Copy 64..96 bytes. Copy 64 bytes from the start and
32 bytes from the end. */
L(copy96):
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 5 ++++
sysdeps/aarch64/memcpy.S | 56 ++++++++++++++++++++++++++-------------------
2 files changed, 37 insertions(+), 24 deletions(-)
hooks/post-receive
--
GNU C Library master sources