This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.23-500-ga024b39


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  a024b39a4e31a049391b459234f6b3575c9fc107 (commit)
      from  a3b473373ee43a292f5ec68a7fda6b9cfb26a9b0 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a024b39a4e31a049391b459234f6b3575c9fc107

commit a024b39a4e31a049391b459234f6b3575c9fc107
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Wed Jun 22 13:24:24 2016 +0100

    This patch further tunes memcpy - avoid one branch for sizes 1-3,
    add a prefetch and improve small copies that are exact powers of 2.
    
            * sysdeps/aarch64/memcpy.S (memcpy):
            Further tuning for performance.

diff --git a/ChangeLog b/ChangeLog
index e418cc0..cb8bdd9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2016-06-22  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* sysdeps/aarch64/memcpy.S (memcpy):
+	Further tuning for performance.
+
 2016-06-21  Florian Weimer  <fweimer@redhat.com>
 
 	[BZ #20284]
diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
index c256828..de73f0f 100644
--- a/sysdeps/aarch64/memcpy.S
+++ b/sysdeps/aarch64/memcpy.S
@@ -35,6 +35,7 @@
 #define A_h	x7
 #define A_hw	w7
 #define B_l	x8
+#define B_lw	w8
 #define B_h	x9
 #define C_l	x10
 #define C_h	x11
@@ -70,21 +71,40 @@ END (memmove)
 libc_hidden_builtin_def (memmove)
 ENTRY (memcpy)
 
+	prfm	PLDL1KEEP, [src]
 	add	srcend, src, count
 	add	dstend, dstin, count
+	cmp	count, 16
+	b.ls	L(copy16)
 	cmp	count, 96
 	b.hi	L(copy_long)
-	cmp	count, 16
-	b.hs	L(copy_medium)
 
+	/* Medium copies: 17..96 bytes.  */
+	sub	tmp1, count, 1
+	ldp	A_l, A_h, [src]
+	tbnz	tmp1, 6, L(copy96)
+	ldp	D_l, D_h, [srcend, -16]
+	tbz	tmp1, 5, 1f
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [srcend, -32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstend, -32]
+1:
+	stp	A_l, A_h, [dstin]
+	stp	D_l, D_h, [dstend, -16]
+	ret
+
+	.p2align 4
 	/* Small copies: 0..16 bytes.  */
 L(copy16):
-	tbz	count, 3, 1f
+	cmp	count, 8
+	b.lo	1f
 	ldr	A_l, [src]
 	ldr	A_h, [srcend, -8]
 	str	A_l, [dstin]
 	str	A_h, [dstend, -8]
 	ret
+	.p2align 4
 1:
 	tbz	count, 2, 1f
 	ldr	A_lw, [src]
@@ -92,33 +112,21 @@ L(copy16):
 	str	A_lw, [dstin]
 	str	A_hw, [dstend, -4]
 	ret
-	.p2align 4
+
+	/* Copy 0..3 bytes.  Use a branchless sequence that copies the same
+	   byte 3 times if count==1, or the 2nd byte twice if count==2.  */
 1:
 	cbz	count, 2f
+	lsr	tmp1, count, 1
 	ldrb	A_lw, [src]
-	tbz	count, 1, 1f
-	ldrh	A_hw, [srcend, -2]
-	strh	A_hw, [dstend, -2]
-1:	strb	A_lw, [dstin]
+	ldrb	A_hw, [srcend, -1]
+	ldrb	B_lw, [src, tmp1]
+	strb	A_lw, [dstin]
+	strb	B_lw, [dstin, tmp1]
+	strb	A_hw, [dstend, -1]
 2:	ret
 
 	.p2align 4
-	/* Medium copies: 17..96 bytes.  */
-L(copy_medium):
-	ldp	A_l, A_h, [src]
-	tbnz	count, 6, L(copy96)
-	ldp	D_l, D_h, [srcend, -16]
-	tbz	count, 5, 1f
-	ldp	B_l, B_h, [src, 16]
-	ldp	C_l, C_h, [srcend, -32]
-	stp	B_l, B_h, [dstin, 16]
-	stp	C_l, C_h, [dstend, -32]
-1:
-	stp	A_l, A_h, [dstin]
-	stp	D_l, D_h, [dstend, -16]
-	ret
-
-	.p2align 4
 	/* Copy 64..96 bytes.  Copy 64 bytes from the start and
 	   32 bytes from the end.  */
 L(copy96):

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                |    5 ++++
 sysdeps/aarch64/memcpy.S |   56 ++++++++++++++++++++++++++-------------------
 2 files changed, 37 insertions(+), 24 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]