This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCHv3 1/2] aarch64: Hoist ZVA check out of the memset function


The DZP bit in the dczid_el0 register does not change dynamically, so
it is safe to read once during program startup.  Hoist the zva check
into an ifunc resolver and store the result into a static variable,
which can be read in case of non-standard zva sizes.  This effectively
adds 3 ifunc variants for memset - one for cases where zva is
disabled, one for 64 byte zva and another for 128 byte zva.  I have
retained the older memset as __memset_generic for internal libc.so use
so that the change impact is minimal.  We should eventually have a
discussion on what is more expensive, reading dczid_el0 on every
memset invocation or the indirection due to PLT.

The gains due to this are significant for falkor, with run time
reductions as high as 42% in some cases.  Likewise for mustang,
although the numbers are slightly lower.  Here's a sample from the
falkor tests:

Function: memset
Variant: walk
                                    simple_memset	__memset_nozva	__memset_zva_64	__memset_generic
========================================================================================================================
                  length=256, char=0:     35936.10 (-706.66%)	     2429.88 ( 45.46%)	     2571.85 ( 42.27%)	     4454.92
                  length=257, char=0:     36209.50 (-710.17%)	     2436.12 ( 45.49%)	     2564.25 ( 42.63%)	     4469.36
                  length=258, char=0:     36507.90 (-710.21%)	     2522.06 ( 44.03%)	     2578.89 ( 42.77%)	     4505.99
                  length=259, char=0:     36764.30 (-711.99%)	     2611.61 ( 42.32%)	     2593.52 ( 42.72%)	     4527.69
                  length=260, char=0:     36943.30 (-712.62%)	     2639.06 ( 41.95%)	     2608.24 ( 42.63%)	     4546.19
                  length=261, char=0:     37287.50 (-717.27%)	     2623.07 ( 42.51%)	     2623.17 ( 42.51%)	     4562.47
                  length=262, char=0:     37573.70 (-722.44%)	     2665.51 ( 41.66%)	     2637.28 ( 42.27%)	     4568.56
                  length=263, char=0:     37833.70 (-724.30%)	     2692.70 ( 41.33%)	     2668.38 ( 41.86%)	     4589.79
                  length=264, char=0:     38136.00 (-727.49%)	     2737.30 ( 40.61%)	     2685.48 ( 41.73%)	     4608.66
                  length=265, char=0:     38403.10 (-730.30%)	     2778.70 ( 39.92%)	     2695.10 ( 41.73%)	     4625.23
                  length=266, char=0:     38684.50 (-729.88%)	     2822.16 ( 39.46%)	     2692.91 ( 42.23%)	     4661.47
                  length=267, char=0:     38954.10 (-732.30%)	     2867.41 ( 38.73%)	     2706.28 ( 42.18%)	     4680.31
                  length=268, char=0:     39155.00 (-733.08%)	     2968.76 ( 36.84%)	     2721.89 ( 42.09%)	     4700.03
                  length=269, char=0:     39559.30 (-737.49%)	     3057.49 ( 35.27%)	     2737.61 ( 42.04%)	     4723.54
                  length=270, char=0:     39813.80 (-742.51%)	     3073.64 ( 34.96%)	     2751.70 ( 41.77%)	     4725.60
                  length=271, char=0:     40070.60 (-744.40%)	     3103.55 ( 34.60%)	     2784.25 ( 41.33%)	     4745.43
                  length=512, char=0:    137515.00 (-1275.48%)	     8971.95 ( 10.26%)	     7168.66 ( 28.30%)	     9997.61
                  length=513, char=0:    138015.00 (-1284.40%)	     8987.07 (  9.85%)	     7242.59 ( 27.35%)	     9969.29
                  length=514, char=0:    138556.00 (-1286.76%)	     9200.17 (  7.92%)	     7211.49 ( 27.82%)	     9991.38
                  length=515, char=0:    139182.00 (-1277.21%)	     9223.64 (  8.73%)	     7232.78 ( 28.43%)	    10106.10
                  length=516, char=0:    139512.00 (-1288.41%)	     9306.80 (  7.38%)	     7312.15 ( 27.23%)	    10048.30
                  length=517, char=0:    140117.00 (-1292.65%)	     9429.22 (  6.28%)	     7273.52 ( 27.71%)	    10061.20
                  length=518, char=0:    140706.00 (-1294.63%)	     9463.83 (  6.20%)	     7292.57 ( 27.72%)	    10089.10
                  length=519, char=0:    141221.00 (-1289.12%)	     9548.99 (  6.07%)	     7312.75 ( 28.07%)	    10166.20
                  length=520, char=0:    141696.00 (-1297.00%)	     9713.49 (  4.27%)	     7386.44 ( 27.21%)	    10147.00
                  length=521, char=0:    142309.00 (-1298.82%)	     9888.41 (  2.80%)	     7361.91 ( 27.64%)	    10173.50
                  length=522, char=0:    142878.00 (-1292.34%)	     9909.30 (  3.43%)	     7381.22 ( 28.07%)	    10261.70
                  length=523, char=0:    143327.00 (-1300.69%)	     9918.78 (  3.07%)	     7462.93 ( 27.07%)	    10232.60
                  length=524, char=0:    143776.00 (-1301.67%)	    10055.40 (  1.97%)	     7428.56 ( 27.58%)	    10257.50
                  length=525, char=0:    144429.00 (-1296.79%)	    10090.80 (  2.41%)	     7449.84 ( 27.95%)	    10340.10
                  length=526, char=0:    144976.00 (-1305.05%)	    10178.80 (  1.35%)	     7530.66 ( 27.02%)	    10318.20
                  length=527, char=0:    145551.00 (-1306.63%)	    10314.40 (  0.32%)	     7498.48 ( 27.53%)	    10347.50
                 length=1024, char=0:    537600.00 (-2116.32%)	    34541.10 (-42.40%)	    22541.00 (  7.07%)	    24256.40
                 length=1025, char=0:    538490.00 (-2117.66%)	    34560.10 (-42.33%)	    22574.10 (  7.03%)	    24281.90
                 length=1026, char=0:    539596.00 (-2118.30%)	    34869.20 (-43.35%)	    22615.10 (  7.03%)	    24324.70
                 length=1027, char=0:    540544.00 (-2118.30%)	    35020.60 (-43.72%)	    22654.60 (  7.03%)	    24367.50
                 length=1028, char=0:    541355.00 (-2119.44%)	    35407.20 (-45.16%)	    22702.00 (  6.93%)	    24391.50
                 length=1029, char=0:    542678.00 (-2121.52%)	    35806.10 (-46.58%)	    22751.10 (  6.87%)	    24428.20
                 length=1030, char=0:    543843.00 (-2122.73%)	    35761.20 (-46.16%)	    22771.20 (  6.93%)	    24467.30
                 length=1031, char=0:    544725.00 (-2123.15%)	    35927.70 (-46.63%)	    22814.10 (  6.89%)	    24502.40
                 length=1032, char=0:    545744.00 (-2124.10%)	    35882.10 (-46.23%)	    22844.50 (  6.90%)	    24537.70
                 length=1033, char=0:    546968.00 (-2125.25%)	    36080.00 (-46.79%)	    22885.20 (  6.90%)	    24580.10
                 length=1034, char=0:    548042.00 (-2126.35%)	    36208.30 (-47.09%)	    22922.90 (  6.88%)	    24616.20
                 length=1035, char=0:    549066.00 (-2127.30%)	    36398.80 (-47.65%)	    22961.30 (  6.86%)	    24651.60
                 length=1036, char=0:    550138.00 (-2127.95%)	    36558.40 (-48.05%)	    23008.70 (  6.82%)	    24692.60
                 length=1037, char=0:    551170.00 (-2129.86%)	    36732.90 (-48.61%)	    23043.40 (  6.77%)	    24717.70
                 length=1038, char=0:    552268.00 (-2130.95%)	    36722.80 (-48.35%)	    23078.80 (  6.77%)	    24754.80
                 length=1039, char=0:    553270.00 (-2131.58%)	    36891.60 (-48.80%)	    23116.80 (  6.76%)	    24792.80

	* sysdeps/aarch64/memset.S (do_no_zva): New macro.
	(do_zva_64): Likewise.
	(do_zva_128): Likewise.
	(__memset): Rename to MEMSET macro.
	(MEMSET): Use the new macros.
	* sysdeps/aarch64/multiarch/Makefile (sysdep_routines):
	Add memset_generic, memset_nozva, memset_zva_64,
	memset_zva_128 and memset_generic.
	* sysdeps/aarch64/multiarch/ifunc-impl-list.c
	(__libc_ifunc_impl_list): Add memset ifuncs.
	* sysdeps/aarch64/multiarch/init-arch.h (INIT_ARCH): New
	local variable zva_size.
	* sysdeps/aarch64/multiarch/memset.c: New file.
	* sysdeps/aarch64/multiarch/memset_generic.S: New file.
	* sysdeps/aarch64/multiarch/memset_nozva.S: New file.
	* sysdeps/aarch64/multiarch/memset_zva_64.S: New file.
	* sysdeps/aarch64/multiarch/memset_zva_128.S: New file.
	* sysdeps/aarch64/multiarch/rtld-memset.S: New file.
	* sysdeps/unix/sysv/linux/aarch64/cpu-features.c
	(DCZID_DZP_MASK): New macro.
	(DCZID_BS_MASK): Likewise.
	(init_cpu_features): Read and set zva_size.
	* sysdeps/unix/sysv/linux/aarch64/cpu-features.h
	(struct cpu_features): New member zva_size.
---
 sysdeps/aarch64/memset.S                       | 200 +++++++++++++++----------
 sysdeps/aarch64/multiarch/Makefile             |   3 +-
 sysdeps/aarch64/multiarch/ifunc-impl-list.c    |   5 +
 sysdeps/aarch64/multiarch/init-arch.h          |   8 +-
 sysdeps/aarch64/multiarch/memset.c             |  43 ++++++
 sysdeps/aarch64/multiarch/memset_generic.S     |  28 ++++
 sysdeps/aarch64/multiarch/memset_nozva.S       |  23 +++
 sysdeps/aarch64/multiarch/memset_zva_128.S     |  24 +++
 sysdeps/aarch64/multiarch/memset_zva_64.S      |  24 +++
 sysdeps/aarch64/multiarch/rtld-memset.S        |  24 +++
 sysdeps/unix/sysv/linux/aarch64/cpu-features.c |  10 ++
 sysdeps/unix/sysv/linux/aarch64/cpu-features.h |   1 +
 12 files changed, 307 insertions(+), 86 deletions(-)
 create mode 100644 sysdeps/aarch64/multiarch/memset.c
 create mode 100644 sysdeps/aarch64/multiarch/memset_generic.S
 create mode 100644 sysdeps/aarch64/multiarch/memset_nozva.S
 create mode 100644 sysdeps/aarch64/multiarch/memset_zva_128.S
 create mode 100644 sysdeps/aarch64/multiarch/memset_zva_64.S
 create mode 100644 sysdeps/aarch64/multiarch/rtld-memset.S

diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index 110fd22..9fea4c2 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -37,7 +37,105 @@
 #define zva_len x7
 #define zva_lenw w7
 
-ENTRY_ALIGN (__memset, 6)
+/* Macros that do the critical loops for either no zva or zva of 64 bytes, 128
+   bytes and higher sizes.  */
+
+/* No ZVA.  */
+.macro do_no_zva
+	sub	count, dstend, dst	/* Count is 16 too large.  */
+	add	dst, dst, 16
+	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
+1:	stp	q0, q0, [dst], 64
+	stp	q0, q0, [dst, -32]
+	subs	count, count, 64
+	b.hi	1b
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+.endm
+
+/* Write the first and last 64 byte aligned block using stp rather
+   than using DC ZVA.  This is faster on some cores.  */
+.macro do_zva_64
+	str	q0, [dst, 16]
+	stp	q0, q0, [dst, 32]
+	bic	dst, dst, 63
+	stp	q0, q0, [dst, 64]
+	stp	q0, q0, [dst, 96]
+	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+	sub	count, count, 128+64+64	/* Adjust count and bias for loop.  */
+	add	dst, dst, 128
+	nop
+1:	dc	zva, dst
+	add	dst, dst, 64
+	subs	count, count, 64
+	b.hi	1b
+	stp	q0, q0, [dst, 0]
+	stp	q0, q0, [dst, 32]
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+.endm
+
+/* ZVA size of 128 bytes.  */
+.macro do_zva_128
+	str	q0, [dst, 16]
+	stp	q0, q0, [dst, 32]
+	stp	q0, q0, [dst, 64]
+	stp	q0, q0, [dst, 96]
+	bic	dst, dst, 127
+	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+	sub	count, count, 128+128	/* Adjust count and bias for loop.  */
+	add	dst, dst, 128
+1:	dc	zva, dst
+	add	dst, dst, 128
+	subs	count, count, 128
+	b.hi	1b
+	stp	q0, q0, [dstend, -128]
+	stp	q0, q0, [dstend, -96]
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+.endm
+
+/* ZVA size of more than 128 bytes.  */
+.macro do_zva_default
+	add	tmp1, zva_len, 64	/* Max alignment bytes written.	 */
+	cmp	count, tmp1
+	blo	L(no_zva)
+
+	sub	tmp2, zva_len, 1
+	add	tmp1, dst, zva_len
+	add	dst, dst, 16
+	subs	count, tmp1, dst	/* Actual alignment bytes to write.  */
+	bic	tmp1, tmp1, tmp2	/* Aligned dc zva start address.  */
+	beq	2f
+1:	stp	q0, q0, [dst], 64
+	stp	q0, q0, [dst, -32]
+	subs	count, count, 64
+	b.hi	1b
+2:	mov	dst, tmp1
+	sub	count, dstend, tmp1	/* Remaining bytes to write.  */
+	subs	count, count, zva_len
+	b.lo	4f
+3:	dc	zva, dst
+	add	dst, dst, zva_len
+	subs	count, count, zva_len
+	b.hs	3b
+4:	add	count, count, zva_len
+	subs	count, count, 64
+	b.ls	6f
+5:	stp	q0, q0, [dst], 64
+	stp	q0, q0, [dst, -32]
+	subs	count, count, 64
+	b.hi	5b
+6:	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+.endm
+
+/* Memset entry point.  */
+ENTRY_ALIGN (MEMSET, 6)
 
 	DELOUSE (0)
 	DELOUSE (2)
@@ -89,107 +187,45 @@ L(set96):
 	.p2align 3
 	nop
 L(set_long):
+#ifdef MEMSET_ZVA
 	and	valw, valw, 255
+#endif
 	bic	dst, dstin, 15
 	str	q0, [dstin]
+#ifdef MEMSET_ZVA
 	cmp	count, 256
 	ccmp	valw, 0, 0, cs
 	b.eq	L(try_zva)
+#endif
 L(no_zva):
-	sub	count, dstend, dst	/* Count is 16 too large.  */
-	add	dst, dst, 16
-	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
-1:	stp	q0, q0, [dst], 64
-	stp	q0, q0, [dst, -32]
-L(tail64):
-	subs	count, count, 64
-	b.hi	1b
-2:	stp	q0, q0, [dstend, -64]
-	stp	q0, q0, [dstend, -32]
-	ret
+	do_no_zva
 
-	.p2align 3
+#ifdef MEMSET_ZVA
+	.p2align 4
 L(try_zva):
+# if MEMSET_ZVA == 64
+	do_zva_64
+# elif MEMSET_ZVA == 128
+	do_zva_128
+# else
 	mrs	tmp1, dczid_el0
 	tbnz	tmp1w, 4, L(no_zva)
 	and	tmp1w, tmp1w, 15
 	cmp	tmp1w, 4	/* ZVA size is 64 bytes.  */
 	b.ne	 L(zva_128)
+	do_zva_64
 
-	/* Write the first and last 64 byte aligned block using stp rather
-	   than using DC ZVA.  This is faster on some cores.
-	 */
-L(zva_64):
-	str	q0, [dst, 16]
-	stp	q0, q0, [dst, 32]
-	bic	dst, dst, 63
-	stp	q0, q0, [dst, 64]
-	stp	q0, q0, [dst, 96]
-	sub	count, dstend, dst	/* Count is now 128 too large.	*/
-	sub	count, count, 128+64+64	/* Adjust count and bias for loop.  */
-	add	dst, dst, 128
-	nop
-1:	dc	zva, dst
-	add	dst, dst, 64
-	subs	count, count, 64
-	b.hi	1b
-	stp	q0, q0, [dst, 0]
-	stp	q0, q0, [dst, 32]
-	stp	q0, q0, [dstend, -64]
-	stp	q0, q0, [dstend, -32]
-	ret
-
-	.p2align 3
 L(zva_128):
 	cmp	tmp1w, 5	/* ZVA size is 128 bytes.  */
 	b.ne	L(zva_other)
-
-	str	q0, [dst, 16]
-	stp	q0, q0, [dst, 32]
-	stp	q0, q0, [dst, 64]
-	stp	q0, q0, [dst, 96]
-	bic	dst, dst, 127
-	sub	count, dstend, dst	/* Count is now 128 too large.	*/
-	sub	count, count, 128+128	/* Adjust count and bias for loop.  */
-	add	dst, dst, 128
-1:	dc	zva, dst
-	add	dst, dst, 128
-	subs	count, count, 128
-	b.hi	1b
-	stp	q0, q0, [dstend, -128]
-	stp	q0, q0, [dstend, -96]
-	stp	q0, q0, [dstend, -64]
-	stp	q0, q0, [dstend, -32]
-	ret
+	do_zva_128
 
 L(zva_other):
 	mov	tmp2w, 4
 	lsl	zva_lenw, tmp2w, tmp1w
-	add	tmp1, zva_len, 64	/* Max alignment bytes written.	 */
-	cmp	count, tmp1
-	blo	L(no_zva)
+	do_zva_default
+# endif
+#endif
 
-	sub	tmp2, zva_len, 1
-	add	tmp1, dst, zva_len
-	add	dst, dst, 16
-	subs	count, tmp1, dst	/* Actual alignment bytes to write.  */
-	bic	tmp1, tmp1, tmp2	/* Aligned dc zva start address.  */
-	beq	2f
-1:	stp	q0, q0, [dst], 64
-	stp	q0, q0, [dst, -32]
-	subs	count, count, 64
-	b.hi	1b
-2:	mov	dst, tmp1
-	sub	count, dstend, tmp1	/* Remaining bytes to write.  */
-	subs	count, count, zva_len
-	b.lo	4f
-3:	dc	zva, dst
-	add	dst, dst, zva_len
-	subs	count, count, zva_len
-	b.hs	3b
-4:	add	count, count, zva_len
-	b	L(tail64)
-
-END (__memset)
-weak_alias (__memset, memset)
-libc_hidden_builtin_def (memset)
+END (MEMSET)
+libc_hidden_builtin_def (MEMSET)
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index 9aa1e79..c1e17e8 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -1,4 +1,5 @@
 ifeq ($(subdir),string)
 sysdep_routines += memcpy_generic memcpy_thunderx memcpy_falkor \
-		   memmove_falkor
+		   memmove_falkor memset_generic memset_nozva memset_zva_64 \
+		   memset_zva_128
 endif
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index 2cb74d5..fb695ce 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -46,6 +46,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
+  IFUNC_IMPL (i, name, memset,
+	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_nozva)
+	      IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva_64)
+	      IFUNC_IMPL_ADD (array, i, memset, (zva_size == 128), __memset_zva_128)
+	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
 
   return i;
 }
diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
index 3af442c..a756dad 100644
--- a/sysdeps/aarch64/multiarch/init-arch.h
+++ b/sysdeps/aarch64/multiarch/init-arch.h
@@ -18,6 +18,8 @@
 
 #include <ldsodefs.h>
 
-#define INIT_ARCH()				\
-  uint64_t __attribute__((unused)) midr =	\
-    GLRO(dl_aarch64_cpu_features).midr_el1;
+#define INIT_ARCH()							      \
+  uint64_t __attribute__((unused)) midr =				      \
+    GLRO(dl_aarch64_cpu_features).midr_el1;				      \
+  unsigned __attribute__((unused)) zva_size =				      \
+    GLRO(dl_aarch64_cpu_features).zva_size;
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
new file mode 100644
index 0000000..a7e34c0
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset.c
@@ -0,0 +1,43 @@
+/* Multiple versions of memset. AARCH64 version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+
+#if IS_IN (libc)
+/* Redefine memset so that the compiler won't complain about the type
+   mismatch with the IFUNC selector in strong_alias, below.  */
+# undef memset
+# define memset __redirect_memset
+# include <string.h>
+# include <init-arch.h>
+
+extern __typeof (__redirect_memset) __libc_memset;
+
+extern __typeof (__redirect_memset) __memset_nozva attribute_hidden;
+extern __typeof (__redirect_memset) __memset_zva_64 attribute_hidden;
+extern __typeof (__redirect_memset) __memset_zva_128 attribute_hidden;
+extern __typeof (__redirect_memset) __memset_generic attribute_hidden;
+
+libc_ifunc (__libc_memset, (zva_size == 0 ? __memset_nozva
+			    : (zva_size == 64 ? __memset_zva_64
+			       : (zva_size == 128 ? __memset_zva_128
+				  : __memset_generic))));
+
+# undef memset
+strong_alias (__libc_memset, memset);
+#endif
diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S
new file mode 100644
index 0000000..8871600
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_generic.S
@@ -0,0 +1,28 @@
+/* Memset for aarch64, default version for internal use.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define MEMSET __memset_generic
+# define MEMSET_ZVA 1
+/* Add a hidden definition for use within libc.so.  */
+# ifdef SHARED
+	.globl __GI_memset; __GI_memset = __memset_generic
+# endif
+# include <sysdeps/aarch64/memset.S>
+#endif
diff --git a/sysdeps/aarch64/multiarch/memset_nozva.S b/sysdeps/aarch64/multiarch/memset_nozva.S
new file mode 100644
index 0000000..2d4fc42
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_nozva.S
@@ -0,0 +1,23 @@
+/* Memset for aarch64, ZVA disabled.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define MEMSET __memset_nozva
+# include <sysdeps/aarch64/memset.S>
+#endif
diff --git a/sysdeps/aarch64/multiarch/memset_zva_128.S b/sysdeps/aarch64/multiarch/memset_zva_128.S
new file mode 100644
index 0000000..2c68127
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_zva_128.S
@@ -0,0 +1,24 @@
+/* Memset for aarch64, ZVA enabled and == 128 bytes.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define MEMSET __memset_zva_128
+# define MEMSET_ZVA 128
+# include <sysdeps/aarch64/memset.S>
+#endif
diff --git a/sysdeps/aarch64/multiarch/memset_zva_64.S b/sysdeps/aarch64/multiarch/memset_zva_64.S
new file mode 100644
index 0000000..ff895f9
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_zva_64.S
@@ -0,0 +1,24 @@
+/* Memset for aarch64, ZVA enabled and == 64 bytes.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define MEMSET __memset_zva_64
+# define MEMSET_ZVA 64
+# include <sysdeps/aarch64/memset.S>
+#endif
diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S
new file mode 100644
index 0000000..172df42
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/rtld-memset.S
@@ -0,0 +1,24 @@
+/* Memset for aarch64, for the dynamic linker.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (rtld)
+# define MEMSET memset
+# define MEMSET_ZVA 1
+# include <sysdeps/aarch64/memset.S>
+#endif
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
index e769eeb..092ee81 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
@@ -20,6 +20,9 @@
 #include <sys/auxv.h>
 #include <elf/dl-hwcaps.h>
 
+#define DCZID_DZP_MASK (1 << 4)
+#define DCZID_BS_MASK (0xf)
+
 #if HAVE_TUNABLES
 struct cpu_list
 {
@@ -72,4 +75,11 @@ init_cpu_features (struct cpu_features *cpu_features)
     }
 
   cpu_features->midr_el1 = midr;
+
+  /* Check if ZVA is enabled.  */
+  unsigned dczid;
+  asm volatile ("mrs %0, dczid_el0" : "=r"(dczid));
+
+  if ((dczid & DCZID_DZP_MASK) == 0)
+    cpu_features->zva_size = 4 << (dczid & DCZID_BS_MASK);
 }
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
index 73cb53d..f2b6afd 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
@@ -47,6 +47,7 @@
 struct cpu_features
 {
   uint64_t midr_el1;
+  unsigned zva_size;
 };
 
 #endif /* _CPU_FEATURES_AARCH64_H  */
-- 
2.7.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]