[PATCH] aarch64: fix strcpy and strnlen for big-endian
Lexi Shao
shaolexi@huawei.com
Fri May 15 02:12:28 GMT 2020
This patch fixes the optimized implementation of strcpy and strnlen
on a big-endian arm64 machine.
The optimized method uses neon, which can process 128bit with one
instruction. On a big-endian machine, the bit order should be reversed
for the whole 128-bits double word. But with instuction
rev64 datav.16b, datav.16b
it reverses 64bits in the two halves rather than reverseing 128bits.
There is no such instruction as rev128 to reverse the 128bits, but we
can fix this by loading the data registers accordingly.
Fixes 0237b61526e7("aarch64: Optimized implementation of strcpy") and
2911cb68ed3d("aarch64: Optimized implementation of strnlen").
Signed-off-by: Lexi Shao <shaolexi@huawei.com>
---
sysdeps/aarch64/strcpy.S | 5 +++++
sysdeps/aarch64/strnlen.S | 5 +++++
2 files changed, 10 insertions(+)
diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
index 52c21c9..08859dd 100644
--- a/sysdeps/aarch64/strcpy.S
+++ b/sysdeps/aarch64/strcpy.S
@@ -234,8 +234,13 @@ L(entry_no_page_cross):
#endif
/* ���loc */
cmeq datav.16b, datav.16b, #0
+#ifdef __AARCH64EB__
+ mov data1, datav.d[1]
+ mov data2, datav.d[0]
+#else
mov data1, datav.d[0]
mov data2, datav.d[1]
+#endif
cmp data1, 0
csel data1, data1, data2, ne
mov pos, 8
diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
index 5981247..086a5c7 100644
--- a/sysdeps/aarch64/strnlen.S
+++ b/sysdeps/aarch64/strnlen.S
@@ -154,8 +154,13 @@ L(loop_end):
byte. */
cmeq datav.16b, datav.16b, #0
+#ifdef __AARCH64EB__
+ mov data1, datav.d[1]
+ mov data2, datav.d[0]
+#else
mov data1, datav.d[0]
mov data2, datav.d[1]
+#endif
cmp data1, 0
csel data1, data1, data2, ne
sub len, src, srcin
--
2.12.3
More information about the Libc-alpha
mailing list