This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCHv3 1/2] aarch64: Hoist ZVA check out of the memset function
- From: Siddhesh Poyarekar <siddhesh at sourceware dot org>
- To: libc-alpha at sourceware dot org
- Cc: adhemerval dot zanella at linaro dot org, szabolcs dot nagy at arm dot com
- Date: Thu, 5 Oct 2017 22:46:34 +0530
- Subject: [PATCHv3 1/2] aarch64: Hoist ZVA check out of the memset function
- Authentication-results: sourceware.org; auth=none
The DZP bit in the dczid_el0 register does not change dynamically, so
it is safe to read once during program startup. Hoist the zva check
into an ifunc resolver and store the result into a static variable,
which can be read in case of non-standard zva sizes. This effectively
adds 3 ifunc variants for memset - one for cases where zva is
disabled, one for 64 byte zva and another for 128 byte zva. I have
retained the older memset as __memset_generic for internal libc.so use
so that the change impact is minimal. We should eventually have a
discussion on what is more expensive, reading dczid_el0 on every
memset invocation or the indirection due to PLT.
The gains due to this are significant for falkor, with run time
reductions as high as 42% in some cases. Likewise for mustang,
although the numbers are slightly lower. Here's a sample from the
falkor tests:
Function: memset
Variant: walk
simple_memset __memset_nozva __memset_zva_64 __memset_generic
========================================================================================================================
length=256, char=0: 35936.10 (-706.66%) 2429.88 ( 45.46%) 2571.85 ( 42.27%) 4454.92
length=257, char=0: 36209.50 (-710.17%) 2436.12 ( 45.49%) 2564.25 ( 42.63%) 4469.36
length=258, char=0: 36507.90 (-710.21%) 2522.06 ( 44.03%) 2578.89 ( 42.77%) 4505.99
length=259, char=0: 36764.30 (-711.99%) 2611.61 ( 42.32%) 2593.52 ( 42.72%) 4527.69
length=260, char=0: 36943.30 (-712.62%) 2639.06 ( 41.95%) 2608.24 ( 42.63%) 4546.19
length=261, char=0: 37287.50 (-717.27%) 2623.07 ( 42.51%) 2623.17 ( 42.51%) 4562.47
length=262, char=0: 37573.70 (-722.44%) 2665.51 ( 41.66%) 2637.28 ( 42.27%) 4568.56
length=263, char=0: 37833.70 (-724.30%) 2692.70 ( 41.33%) 2668.38 ( 41.86%) 4589.79
length=264, char=0: 38136.00 (-727.49%) 2737.30 ( 40.61%) 2685.48 ( 41.73%) 4608.66
length=265, char=0: 38403.10 (-730.30%) 2778.70 ( 39.92%) 2695.10 ( 41.73%) 4625.23
length=266, char=0: 38684.50 (-729.88%) 2822.16 ( 39.46%) 2692.91 ( 42.23%) 4661.47
length=267, char=0: 38954.10 (-732.30%) 2867.41 ( 38.73%) 2706.28 ( 42.18%) 4680.31
length=268, char=0: 39155.00 (-733.08%) 2968.76 ( 36.84%) 2721.89 ( 42.09%) 4700.03
length=269, char=0: 39559.30 (-737.49%) 3057.49 ( 35.27%) 2737.61 ( 42.04%) 4723.54
length=270, char=0: 39813.80 (-742.51%) 3073.64 ( 34.96%) 2751.70 ( 41.77%) 4725.60
length=271, char=0: 40070.60 (-744.40%) 3103.55 ( 34.60%) 2784.25 ( 41.33%) 4745.43
length=512, char=0: 137515.00 (-1275.48%) 8971.95 ( 10.26%) 7168.66 ( 28.30%) 9997.61
length=513, char=0: 138015.00 (-1284.40%) 8987.07 ( 9.85%) 7242.59 ( 27.35%) 9969.29
length=514, char=0: 138556.00 (-1286.76%) 9200.17 ( 7.92%) 7211.49 ( 27.82%) 9991.38
length=515, char=0: 139182.00 (-1277.21%) 9223.64 ( 8.73%) 7232.78 ( 28.43%) 10106.10
length=516, char=0: 139512.00 (-1288.41%) 9306.80 ( 7.38%) 7312.15 ( 27.23%) 10048.30
length=517, char=0: 140117.00 (-1292.65%) 9429.22 ( 6.28%) 7273.52 ( 27.71%) 10061.20
length=518, char=0: 140706.00 (-1294.63%) 9463.83 ( 6.20%) 7292.57 ( 27.72%) 10089.10
length=519, char=0: 141221.00 (-1289.12%) 9548.99 ( 6.07%) 7312.75 ( 28.07%) 10166.20
length=520, char=0: 141696.00 (-1297.00%) 9713.49 ( 4.27%) 7386.44 ( 27.21%) 10147.00
length=521, char=0: 142309.00 (-1298.82%) 9888.41 ( 2.80%) 7361.91 ( 27.64%) 10173.50
length=522, char=0: 142878.00 (-1292.34%) 9909.30 ( 3.43%) 7381.22 ( 28.07%) 10261.70
length=523, char=0: 143327.00 (-1300.69%) 9918.78 ( 3.07%) 7462.93 ( 27.07%) 10232.60
length=524, char=0: 143776.00 (-1301.67%) 10055.40 ( 1.97%) 7428.56 ( 27.58%) 10257.50
length=525, char=0: 144429.00 (-1296.79%) 10090.80 ( 2.41%) 7449.84 ( 27.95%) 10340.10
length=526, char=0: 144976.00 (-1305.05%) 10178.80 ( 1.35%) 7530.66 ( 27.02%) 10318.20
length=527, char=0: 145551.00 (-1306.63%) 10314.40 ( 0.32%) 7498.48 ( 27.53%) 10347.50
length=1024, char=0: 537600.00 (-2116.32%) 34541.10 (-42.40%) 22541.00 ( 7.07%) 24256.40
length=1025, char=0: 538490.00 (-2117.66%) 34560.10 (-42.33%) 22574.10 ( 7.03%) 24281.90
length=1026, char=0: 539596.00 (-2118.30%) 34869.20 (-43.35%) 22615.10 ( 7.03%) 24324.70
length=1027, char=0: 540544.00 (-2118.30%) 35020.60 (-43.72%) 22654.60 ( 7.03%) 24367.50
length=1028, char=0: 541355.00 (-2119.44%) 35407.20 (-45.16%) 22702.00 ( 6.93%) 24391.50
length=1029, char=0: 542678.00 (-2121.52%) 35806.10 (-46.58%) 22751.10 ( 6.87%) 24428.20
length=1030, char=0: 543843.00 (-2122.73%) 35761.20 (-46.16%) 22771.20 ( 6.93%) 24467.30
length=1031, char=0: 544725.00 (-2123.15%) 35927.70 (-46.63%) 22814.10 ( 6.89%) 24502.40
length=1032, char=0: 545744.00 (-2124.10%) 35882.10 (-46.23%) 22844.50 ( 6.90%) 24537.70
length=1033, char=0: 546968.00 (-2125.25%) 36080.00 (-46.79%) 22885.20 ( 6.90%) 24580.10
length=1034, char=0: 548042.00 (-2126.35%) 36208.30 (-47.09%) 22922.90 ( 6.88%) 24616.20
length=1035, char=0: 549066.00 (-2127.30%) 36398.80 (-47.65%) 22961.30 ( 6.86%) 24651.60
length=1036, char=0: 550138.00 (-2127.95%) 36558.40 (-48.05%) 23008.70 ( 6.82%) 24692.60
length=1037, char=0: 551170.00 (-2129.86%) 36732.90 (-48.61%) 23043.40 ( 6.77%) 24717.70
length=1038, char=0: 552268.00 (-2130.95%) 36722.80 (-48.35%) 23078.80 ( 6.77%) 24754.80
length=1039, char=0: 553270.00 (-2131.58%) 36891.60 (-48.80%) 23116.80 ( 6.76%) 24792.80
* sysdeps/aarch64/memset.S (do_no_zva): New macro.
(do_zva_64): Likewise.
(do_zva_128): Likewise.
(__memset): Rename to MEMSET macro.
(MEMSET): Use the new macros.
* sysdeps/aarch64/multiarch/Makefile (sysdep_routines):
Add memset_generic, memset_nozva, memset_zva_64,
memset_zva_128 and memset_generic.
* sysdeps/aarch64/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list): Add memset ifuncs.
* sysdeps/aarch64/multiarch/init-arch.h (INIT_ARCH): New
local variable zva_size.
* sysdeps/aarch64/multiarch/memset.c: New file.
* sysdeps/aarch64/multiarch/memset_generic.S: New file.
* sysdeps/aarch64/multiarch/memset_nozva.S: New file.
* sysdeps/aarch64/multiarch/memset_zva_64.S: New file.
* sysdeps/aarch64/multiarch/memset_zva_128.S: New file.
* sysdeps/aarch64/multiarch/rtld-memset.S: New file.
* sysdeps/unix/sysv/linux/aarch64/cpu-features.c
(DCZID_DZP_MASK): New macro.
(DCZID_BS_MASK): Likewise.
(init_cpu_features): Read and set zva_size.
* sysdeps/unix/sysv/linux/aarch64/cpu-features.h
(struct cpu_features): New member zva_size.
---
sysdeps/aarch64/memset.S | 200 +++++++++++++++----------
sysdeps/aarch64/multiarch/Makefile | 3 +-
sysdeps/aarch64/multiarch/ifunc-impl-list.c | 5 +
sysdeps/aarch64/multiarch/init-arch.h | 8 +-
sysdeps/aarch64/multiarch/memset.c | 43 ++++++
sysdeps/aarch64/multiarch/memset_generic.S | 28 ++++
sysdeps/aarch64/multiarch/memset_nozva.S | 23 +++
sysdeps/aarch64/multiarch/memset_zva_128.S | 24 +++
sysdeps/aarch64/multiarch/memset_zva_64.S | 24 +++
sysdeps/aarch64/multiarch/rtld-memset.S | 24 +++
sysdeps/unix/sysv/linux/aarch64/cpu-features.c | 10 ++
sysdeps/unix/sysv/linux/aarch64/cpu-features.h | 1 +
12 files changed, 307 insertions(+), 86 deletions(-)
create mode 100644 sysdeps/aarch64/multiarch/memset.c
create mode 100644 sysdeps/aarch64/multiarch/memset_generic.S
create mode 100644 sysdeps/aarch64/multiarch/memset_nozva.S
create mode 100644 sysdeps/aarch64/multiarch/memset_zva_128.S
create mode 100644 sysdeps/aarch64/multiarch/memset_zva_64.S
create mode 100644 sysdeps/aarch64/multiarch/rtld-memset.S
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index 110fd22..9fea4c2 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -37,7 +37,105 @@
#define zva_len x7
#define zva_lenw w7
-ENTRY_ALIGN (__memset, 6)
+/* Macros that do the critical loops for either no zva or zva of 64 bytes, 128
+ bytes and higher sizes. */
+
+/* No ZVA. */
+.macro do_no_zva
+ sub count, dstend, dst /* Count is 16 too large. */
+ add dst, dst, 16
+ sub count, count, 64 + 16 /* Adjust count and bias for loop. */
+1: stp q0, q0, [dst], 64
+ stp q0, q0, [dst, -32]
+ subs count, count, 64
+ b.hi 1b
+ stp q0, q0, [dstend, -64]
+ stp q0, q0, [dstend, -32]
+ ret
+.endm
+
+/* Write the first and last 64 byte aligned block using stp rather
+ than using DC ZVA. This is faster on some cores. */
+.macro do_zva_64
+ str q0, [dst, 16]
+ stp q0, q0, [dst, 32]
+ bic dst, dst, 63
+ stp q0, q0, [dst, 64]
+ stp q0, q0, [dst, 96]
+ sub count, dstend, dst /* Count is now 128 too large. */
+ sub count, count, 128+64+64 /* Adjust count and bias for loop. */
+ add dst, dst, 128
+ nop
+1: dc zva, dst
+ add dst, dst, 64
+ subs count, count, 64
+ b.hi 1b
+ stp q0, q0, [dst, 0]
+ stp q0, q0, [dst, 32]
+ stp q0, q0, [dstend, -64]
+ stp q0, q0, [dstend, -32]
+ ret
+.endm
+
+/* ZVA size of 128 bytes. */
+.macro do_zva_128
+ str q0, [dst, 16]
+ stp q0, q0, [dst, 32]
+ stp q0, q0, [dst, 64]
+ stp q0, q0, [dst, 96]
+ bic dst, dst, 127
+ sub count, dstend, dst /* Count is now 128 too large. */
+ sub count, count, 128+128 /* Adjust count and bias for loop. */
+ add dst, dst, 128
+1: dc zva, dst
+ add dst, dst, 128
+ subs count, count, 128
+ b.hi 1b
+ stp q0, q0, [dstend, -128]
+ stp q0, q0, [dstend, -96]
+ stp q0, q0, [dstend, -64]
+ stp q0, q0, [dstend, -32]
+ ret
+.endm
+
+/* ZVA size of more than 128 bytes. */
+.macro do_zva_default
+ add tmp1, zva_len, 64 /* Max alignment bytes written. */
+ cmp count, tmp1
+ blo L(no_zva)
+
+ sub tmp2, zva_len, 1
+ add tmp1, dst, zva_len
+ add dst, dst, 16
+ subs count, tmp1, dst /* Actual alignment bytes to write. */
+ bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
+ beq 2f
+1: stp q0, q0, [dst], 64
+ stp q0, q0, [dst, -32]
+ subs count, count, 64
+ b.hi 1b
+2: mov dst, tmp1
+ sub count, dstend, tmp1 /* Remaining bytes to write. */
+ subs count, count, zva_len
+ b.lo 4f
+3: dc zva, dst
+ add dst, dst, zva_len
+ subs count, count, zva_len
+ b.hs 3b
+4: add count, count, zva_len
+ subs count, count, 64
+ b.ls 6f
+5: stp q0, q0, [dst], 64
+ stp q0, q0, [dst, -32]
+ subs count, count, 64
+ b.hi 5b
+6: stp q0, q0, [dstend, -64]
+ stp q0, q0, [dstend, -32]
+ ret
+.endm
+
+/* Memset entry point. */
+ENTRY_ALIGN (MEMSET, 6)
DELOUSE (0)
DELOUSE (2)
@@ -89,107 +187,45 @@ L(set96):
.p2align 3
nop
L(set_long):
+#ifdef MEMSET_ZVA
and valw, valw, 255
+#endif
bic dst, dstin, 15
str q0, [dstin]
+#ifdef MEMSET_ZVA
cmp count, 256
ccmp valw, 0, 0, cs
b.eq L(try_zva)
+#endif
L(no_zva):
- sub count, dstend, dst /* Count is 16 too large. */
- add dst, dst, 16
- sub count, count, 64 + 16 /* Adjust count and bias for loop. */
-1: stp q0, q0, [dst], 64
- stp q0, q0, [dst, -32]
-L(tail64):
- subs count, count, 64
- b.hi 1b
-2: stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
+ do_no_zva
- .p2align 3
+#ifdef MEMSET_ZVA
+ .p2align 4
L(try_zva):
+# if MEMSET_ZVA == 64
+ do_zva_64
+# elif MEMSET_ZVA == 128
+ do_zva_128
+# else
mrs tmp1, dczid_el0
tbnz tmp1w, 4, L(no_zva)
and tmp1w, tmp1w, 15
cmp tmp1w, 4 /* ZVA size is 64 bytes. */
b.ne L(zva_128)
+ do_zva_64
- /* Write the first and last 64 byte aligned block using stp rather
- than using DC ZVA. This is faster on some cores.
- */
-L(zva_64):
- str q0, [dst, 16]
- stp q0, q0, [dst, 32]
- bic dst, dst, 63
- stp q0, q0, [dst, 64]
- stp q0, q0, [dst, 96]
- sub count, dstend, dst /* Count is now 128 too large. */
- sub count, count, 128+64+64 /* Adjust count and bias for loop. */
- add dst, dst, 128
- nop
-1: dc zva, dst
- add dst, dst, 64
- subs count, count, 64
- b.hi 1b
- stp q0, q0, [dst, 0]
- stp q0, q0, [dst, 32]
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
-
- .p2align 3
L(zva_128):
cmp tmp1w, 5 /* ZVA size is 128 bytes. */
b.ne L(zva_other)
-
- str q0, [dst, 16]
- stp q0, q0, [dst, 32]
- stp q0, q0, [dst, 64]
- stp q0, q0, [dst, 96]
- bic dst, dst, 127
- sub count, dstend, dst /* Count is now 128 too large. */
- sub count, count, 128+128 /* Adjust count and bias for loop. */
- add dst, dst, 128
-1: dc zva, dst
- add dst, dst, 128
- subs count, count, 128
- b.hi 1b
- stp q0, q0, [dstend, -128]
- stp q0, q0, [dstend, -96]
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
+ do_zva_128
L(zva_other):
mov tmp2w, 4
lsl zva_lenw, tmp2w, tmp1w
- add tmp1, zva_len, 64 /* Max alignment bytes written. */
- cmp count, tmp1
- blo L(no_zva)
+ do_zva_default
+# endif
+#endif
- sub tmp2, zva_len, 1
- add tmp1, dst, zva_len
- add dst, dst, 16
- subs count, tmp1, dst /* Actual alignment bytes to write. */
- bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
- beq 2f
-1: stp q0, q0, [dst], 64
- stp q0, q0, [dst, -32]
- subs count, count, 64
- b.hi 1b
-2: mov dst, tmp1
- sub count, dstend, tmp1 /* Remaining bytes to write. */
- subs count, count, zva_len
- b.lo 4f
-3: dc zva, dst
- add dst, dst, zva_len
- subs count, count, zva_len
- b.hs 3b
-4: add count, count, zva_len
- b L(tail64)
-
-END (__memset)
-weak_alias (__memset, memset)
-libc_hidden_builtin_def (memset)
+END (MEMSET)
+libc_hidden_builtin_def (MEMSET)
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index 9aa1e79..c1e17e8 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -1,4 +1,5 @@
ifeq ($(subdir),string)
sysdep_routines += memcpy_generic memcpy_thunderx memcpy_falkor \
- memmove_falkor
+ memmove_falkor memset_generic memset_nozva memset_zva_64 \
+ memset_zva_128
endif
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index 2cb74d5..fb695ce 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -46,6 +46,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
+ IFUNC_IMPL (i, name, memset,
+ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_nozva)
+ IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva_64)
+ IFUNC_IMPL_ADD (array, i, memset, (zva_size == 128), __memset_zva_128)
+ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
return i;
}
diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
index 3af442c..a756dad 100644
--- a/sysdeps/aarch64/multiarch/init-arch.h
+++ b/sysdeps/aarch64/multiarch/init-arch.h
@@ -18,6 +18,8 @@
#include <ldsodefs.h>
-#define INIT_ARCH() \
- uint64_t __attribute__((unused)) midr = \
- GLRO(dl_aarch64_cpu_features).midr_el1;
+#define INIT_ARCH() \
+ uint64_t __attribute__((unused)) midr = \
+ GLRO(dl_aarch64_cpu_features).midr_el1; \
+ unsigned __attribute__((unused)) zva_size = \
+ GLRO(dl_aarch64_cpu_features).zva_size;
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
new file mode 100644
index 0000000..a7e34c0
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset.c
@@ -0,0 +1,43 @@
+/* Multiple versions of memset. AARCH64 version.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+
+#if IS_IN (libc)
+/* Redefine memset so that the compiler won't complain about the type
+ mismatch with the IFUNC selector in strong_alias, below. */
+# undef memset
+# define memset __redirect_memset
+# include <string.h>
+# include <init-arch.h>
+
+extern __typeof (__redirect_memset) __libc_memset;
+
+extern __typeof (__redirect_memset) __memset_nozva attribute_hidden;
+extern __typeof (__redirect_memset) __memset_zva_64 attribute_hidden;
+extern __typeof (__redirect_memset) __memset_zva_128 attribute_hidden;
+extern __typeof (__redirect_memset) __memset_generic attribute_hidden;
+
+libc_ifunc (__libc_memset, (zva_size == 0 ? __memset_nozva
+ : (zva_size == 64 ? __memset_zva_64
+ : (zva_size == 128 ? __memset_zva_128
+ : __memset_generic))));
+
+# undef memset
+strong_alias (__libc_memset, memset);
+#endif
diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S
new file mode 100644
index 0000000..8871600
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_generic.S
@@ -0,0 +1,28 @@
+/* Memset for aarch64, default version for internal use.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define MEMSET __memset_generic
+# define MEMSET_ZVA 1
+/* Add a hidden definition for use within libc.so. */
+# ifdef SHARED
+ .globl __GI_memset; __GI_memset = __memset_generic
+# endif
+# include <sysdeps/aarch64/memset.S>
+#endif
diff --git a/sysdeps/aarch64/multiarch/memset_nozva.S b/sysdeps/aarch64/multiarch/memset_nozva.S
new file mode 100644
index 0000000..2d4fc42
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_nozva.S
@@ -0,0 +1,23 @@
+/* Memset for aarch64, ZVA disabled.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define MEMSET __memset_nozva
+# include <sysdeps/aarch64/memset.S>
+#endif
diff --git a/sysdeps/aarch64/multiarch/memset_zva_128.S b/sysdeps/aarch64/multiarch/memset_zva_128.S
new file mode 100644
index 0000000..2c68127
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_zva_128.S
@@ -0,0 +1,24 @@
+/* Memset for aarch64, ZVA enabled and == 128 bytes.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define MEMSET __memset_zva_128
+# define MEMSET_ZVA 128
+# include <sysdeps/aarch64/memset.S>
+#endif
diff --git a/sysdeps/aarch64/multiarch/memset_zva_64.S b/sysdeps/aarch64/multiarch/memset_zva_64.S
new file mode 100644
index 0000000..ff895f9
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_zva_64.S
@@ -0,0 +1,24 @@
+/* Memset for aarch64, ZVA enabled and == 64 bytes.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define MEMSET __memset_zva_64
+# define MEMSET_ZVA 64
+# include <sysdeps/aarch64/memset.S>
+#endif
diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S
new file mode 100644
index 0000000..172df42
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/rtld-memset.S
@@ -0,0 +1,24 @@
+/* Memset for aarch64, for the dynamic linker.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (rtld)
+# define MEMSET memset
+# define MEMSET_ZVA 1
+# include <sysdeps/aarch64/memset.S>
+#endif
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
index e769eeb..092ee81 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
@@ -20,6 +20,9 @@
#include <sys/auxv.h>
#include <elf/dl-hwcaps.h>
+#define DCZID_DZP_MASK (1 << 4)
+#define DCZID_BS_MASK (0xf)
+
#if HAVE_TUNABLES
struct cpu_list
{
@@ -72,4 +75,11 @@ init_cpu_features (struct cpu_features *cpu_features)
}
cpu_features->midr_el1 = midr;
+
+ /* Check if ZVA is enabled. */
+ unsigned dczid;
+ asm volatile ("mrs %0, dczid_el0" : "=r"(dczid));
+
+ if ((dczid & DCZID_DZP_MASK) == 0)
+ cpu_features->zva_size = 4 << (dczid & DCZID_BS_MASK);
}
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
index 73cb53d..f2b6afd 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
@@ -47,6 +47,7 @@
struct cpu_features
{
uint64_t midr_el1;
+ unsigned zva_size;
};
#endif /* _CPU_FEATURES_AARCH64_H */
--
2.7.4