[AArch64] Adjust writeback in non-zero memset

author Wilco Dijkstra <wdijkstr@arm.com>

Tue, 20 Nov 2018 12:37:00 +0000 (12:37 +0000)

committer Wilco Dijkstra <wdijkstr@arm.com>

Tue, 20 Nov 2018 12:37:00 +0000 (12:37 +0000)
author Wilco Dijkstra <wdijkstr@arm.com>
Tue, 20 Nov 2018 12:37:00 +0000 (12:37 +0000)
committer Wilco Dijkstra <wdijkstr@arm.com>
Tue, 20 Nov 2018 12:37:00 +0000 (12:37 +0000)
diff --git a/ChangeLog b/ChangeLog

index d340866c43df25d5fada04cdbcfda2ef7381e90e..be2344248f6f5f92e0564fd976022717546a8185 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2018-11-20  Wilco Dijkstra  <wdijkstr@arm.com>
+
+       * sysdeps/aarch64/memset.S (MEMSET): Improve non-zero memset loop.
+
  2018-11-20  Joseph Myers  <joseph@codesourcery.com>
  
         * conform/conformtest.py (ElementTest.run): Use unique identifiers
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S

index 4a454593618f78e22c55520d56737fab5d8f63a4..9738cf5fd55a1d937fb3392cec46f37b4d5fb51d 100644 (file)
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -89,10 +89,10 @@ L(set_long):
         b.eq    L(try_zva)
  L(no_zva):
         sub     count, dstend, dst      /* Count is 16 too large.  */
-       add     dst, dst, 16
+       sub     dst, dst, 16            /* Dst is biased by -32.  */
         sub     count, count, 64 + 16   /* Adjust count and bias for loop.  */
-1:     stp     q0, q0, [dst], 64
-       stp     q0, q0, [dst, -32]
+1:     stp     q0, q0, [dst, 32]
+       stp     q0, q0, [dst, 64]!
  L(tail64):
         subs    count, count, 64
         b.hi    1b
@@ -183,6 +183,7 @@ L(zva_other):
         subs    count, count, zva_len
         b.hs    3b
  4:     add     count, count, zva_len
+       sub     dst, dst, 32            /* Bias dst for tail loop.  */
         b       L(tail64)
  #endif
author	Wilco Dijkstra <wdijkstr@arm.com>
	Tue, 20 Nov 2018 12:37:00 +0000 (12:37 +0000)
committer	Wilco Dijkstra <wdijkstr@arm.com>
	Tue, 20 Nov 2018 12:37:00 +0000 (12:37 +0000)
ChangeLog		patch \| blob \| blame \| history
sysdeps/aarch64/memset.S		patch \| blob \| blame \| history