[PATCH 2/3] aarch64: Remove non-temporal load/stores from oryon-1's memcpy

Andrew Pinski quic_apinski@quicinc.com
Fri Nov 15 03:03:19 GMT 2024


The hardware architects have a new recommendation not to use
non-temporal load/stores for memcpy. This patch removes this path.
I found there was no difference in the memcpy speed with/without
non-temporal load/stores either.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 sysdeps/aarch64/multiarch/memcpy_oryon1.S | 40 -----------------------
 1 file changed, 40 deletions(-)

diff --git a/sysdeps/aarch64/multiarch/memcpy_oryon1.S b/sysdeps/aarch64/multiarch/memcpy_oryon1.S
index 4efc43df28..6cae97dc96 100644
--- a/sysdeps/aarch64/multiarch/memcpy_oryon1.S
+++ b/sysdeps/aarch64/multiarch/memcpy_oryon1.S
@@ -160,46 +160,6 @@ L(copy96):
 	.p2align 6
 L(copy_long):
 
-	/* On oryon1 cores, large memcpy's are helped by using ldnp/stnp.
-	   This loop is identical to the one below it but using ldnp/stnp
-	   instructions.  For loops that are less than 32768 bytes,
-	   the ldnp/stnp instructions will not help and will cause a slow
-	   down so only use the ldnp/stnp loop for the largest sizes.  */
-
-	cmp	count, #32768
-	b.lo	L(copy_long_without_nontemp)
-	and	tmp1, dstin, 15
-	bic	dst, dstin, 15
-	ldnp	D_l, D_h, [src]
-	sub	src, src, tmp1
-	add	count, count, tmp1	/* Count is now 16 too large.  */
-	ldnp	A_l, A_h, [src, 16]
-	stnp	D_l, D_h, [dstin]
-	ldnp	B_l, B_h, [src, 32]
-	ldnp	C_l, C_h, [src, 48]
-	ldnp	D_l, D_h, [src, 64]
-	add	src, src, #64
-	subs	count, count, 128 + 16	/* Test and readjust count.  */
-
-L(nontemp_loop64):
-	tbz	src, #6, 1f
-1:
-	stnp	A_l, A_h, [dst, 16]
-	ldnp	A_l, A_h, [src, 16]
-	stnp	B_l, B_h, [dst, 32]
-	ldnp	B_l, B_h, [src, 32]
-	stnp	C_l, C_h, [dst, 48]
-	ldnp	C_l, C_h, [src, 48]
-	stnp	D_l, D_h, [dst, 64]
-	ldnp	D_l, D_h, [src, 64]
-	add	src, src, #64
-	add	dst, dst, #64
-	subs	count, count, 64
-	b.hi	L(nontemp_loop64)
-	b	L(last64)
-
-L(copy_long_without_nontemp):
-
 	and	tmp1, dstin, 15
 	bic	dst, dstin, 15
 	ldp	D_l, D_h, [src]
-- 
2.43.0



More information about the Libc-alpha mailing list