This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH v2] aarch64: thunderx2 memcpy branches reordering
- From: Anton Youdkevitch <anton dot youdkevitch at bell-sw dot com>
- To: libc-alpha at sourceware dot org
- Date: Wed, 20 Mar 2019 19:29:32 +0300
- Subject: [PATCH v2] aarch64: thunderx2 memcpy branches reordering
Rewrote the branches in load and merge chunk
so that the order is more in line with the
most probable case.
ChangeLog:
* sysdeps/aarch64/multiarch/memcpy_thunderx2.S:
branches reordering
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
index b2215c1..f637300 100644
--- a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
@@ -382,7 +382,8 @@ L(bytes_0_to_3):
strb A_lw, [dstin]
strb B_lw, [dstin, tmp1]
strb A_hw, [dstend, -1]
-L(end): ret
+L(end):
+ ret
.p2align 4
@@ -557,17 +558,9 @@ L(ext_size_ ## shft):;\
ext A_v.16b, C_v.16b, D_v.16b, 16-shft;\
ext B_v.16b, D_v.16b, E_v.16b, 16-shft;\
subs count, count, 32;\
- b.ge 2f;\
+ b.lt 2f;\
1:;\
stp A_q, B_q, [dst], #32;\
- ext H_v.16b, E_v.16b, F_v.16b, 16-shft;\
- ext I_v.16b, F_v.16b, G_v.16b, 16-shft;\
- stp H_q, I_q, [dst], #16;\
- add dst, dst, tmp1;\
- str G_q, [dst], #16;\
- b L(copy_long_check32);\
-2:;\
- stp A_q, B_q, [dst], #32;\
prfm pldl1strm, [src, MEMCPY_PREFETCH_LDR];\
ldp D_q, J_q, [src], #32;\
ext H_v.16b, E_v.16b, F_v.16b, 16-shft;\
@@ -579,8 +572,15 @@ L(ext_size_ ## shft):;\
ext B_v.16b, D_v.16b, J_v.16b, 16-shft;\
mov E_v.16b, J_v.16b;\
subs count, count, 64;\
- b.ge 2b;\
- b 1b;\
+ b.ge 1b;\
+2:;\
+ stp A_q, B_q, [dst], #32;\
+ ext H_v.16b, E_v.16b, F_v.16b, 16-shft;\
+ ext I_v.16b, F_v.16b, G_v.16b, 16-shft;\
+ stp H_q, I_q, [dst], #16;\
+ add dst, dst, tmp1;\
+ str G_q, [dst], #16;\
+ b L(copy_long_check32);\
EXT_CHUNK(1)
EXT_CHUNK(2)