[PATCH v4] benchtests: Add memset zero fill benchtest
Naohiro Tamura
naohirot@fujitsu.com
Tue Sep 14 00:38:28 GMT 2021
Memset takes 0 as the second parameter in most cases.
However, we cannot measure the zero fill performance by
bench-memset.c, bench-memset-large.c and bench-memset-walk.c
precisely.
X86_64 micro-architecture has some zero-over-zero optimization, and
AArch64 micro-architecture also has some optimization for DC ZVA
instruction.
This patch provides bench-memset-zerofill.c which is suitable to
analyze the zero fill performance by comparing among 4 patterns,
zero-over-zero, zero-over-one, one-over-zero and one-over-one, from
256B to 64MB(RAM) through L1, L2 and L3 caches.
The following commands are examples to analyze a JSON output,
bench-memset-zerofill.out, by 'jq' and 'plot_strings.py'.
1) compare zero-over-zero performance
$ cat bench-memset-zerofill.out | \
jq -r '
.functions.memset."bench-variant"="zerofill-0o0" |
del(.functions.memset.results[] | select(.char1 != 0 or .char2 != 0))
' | \
plot_strings.py -l -p thru -v -
2) compare zero paformance
$ cat bench-memset-zerofill.out | \
jq -r '
.functions.memset."bench-variant"="zerofill-zero" |
del(.functions.memset.results[] | select(.char2 != 0))
' | \
plot_strings.py -l -p thru -v -
3) compare nonzero paformance
$ cat bench-memset-zerofill.out | \
jq -r '
.functions.memset."bench-variant"="zerofill-nonzero" |
del(.functions.memset.results[] | select(.char2 == 0))
' | \
plot_strings.py -l -p thru -v -
Reviewed-by: Lucas A. M. Magalhaes <lamm@linux.ibm.com>
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
---
benchtests/Makefile | 2 +-
benchtests/bench-memset-zerofill.c | 140 +++++++++++++++++++++++++++++
2 files changed, 141 insertions(+), 1 deletion(-)
create mode 100644 benchtests/bench-memset-zerofill.c
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 1530939a8ce8..21b95c736190 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -53,7 +53,7 @@ string-benchset := memccpy memchr memcmp memcpy memmem memmove \
strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
strcoll memcpy-large memcpy-random memmove-large memset-large \
- memcpy-walk memset-walk memmove-walk
+ memcpy-walk memset-walk memmove-walk memset-zerofill
# Build and run locale-dependent benchmarks only if we're building natively.
ifeq (no,$(cross-compiling))
diff --git a/benchtests/bench-memset-zerofill.c b/benchtests/bench-memset-zerofill.c
new file mode 100644
index 000000000000..0e6958ab59dd
--- /dev/null
+++ b/benchtests/bench-memset-zerofill.c
@@ -0,0 +1,140 @@
+/* Measure memset functions with zero fill data.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define TEST_MAIN
+#define TEST_NAME "memset"
+#define START_SIZE 256
+#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
+#define TIMEOUT (20 * 60)
+#include "bench-string.h"
+
+#include "json-lib.h"
+
+void *generic_memset (void *, int, size_t);
+typedef void *(*proto_t) (void *, int, size_t);
+
+IMPL (MEMSET, 1)
+IMPL (generic_memset, 0)
+
+static void
+__attribute__((noinline, noclone))
+do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
+ int c1 __attribute ((unused)), int c2 __attribute ((unused)),
+ size_t n)
+{
+ size_t i, iters = 32;
+ timing_t start, stop, cur, latency = 0;
+
+ CALL (impl, s, c2, n); // warm up
+
+ for (i = 0; i < iters; i++)
+ {
+ memset (s, c1, n); // alternation
+
+ TIMING_NOW (start);
+
+ CALL (impl, s, c2, n);
+
+ TIMING_NOW (stop);
+ TIMING_DIFF (cur, start, stop);
+ TIMING_ACCUM (latency, cur);
+ }
+
+ json_element_double (json_ctx, (double) latency / (double) iters);
+}
+
+static void
+do_test (json_ctx_t *json_ctx, size_t align, int c1, int c2, size_t len)
+{
+ align &= getpagesize () - 1;
+ if ((align + len) * sizeof (CHAR) > page_size)
+ return;
+
+ json_element_object_begin (json_ctx);
+ json_attr_uint (json_ctx, "length", len);
+ json_attr_uint (json_ctx, "alignment", align);
+ json_attr_int (json_ctx, "char1", c1);
+ json_attr_int (json_ctx, "char2", c2);
+ json_array_begin (json_ctx, "timings");
+
+ FOR_EACH_IMPL (impl, 0)
+ {
+ do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c1, c2, len);
+ alloc_bufs ();
+ }
+
+ json_array_end (json_ctx);
+ json_element_object_end (json_ctx);
+}
+
+int
+test_main (void)
+{
+ json_ctx_t json_ctx;
+ size_t i;
+ int c1, c2;
+
+ test_init ();
+
+ json_init (&json_ctx, 0, stdout);
+
+ json_document_begin (&json_ctx);
+ json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
+
+ json_attr_object_begin (&json_ctx, "functions");
+ json_attr_object_begin (&json_ctx, TEST_NAME);
+ json_attr_string (&json_ctx, "bench-variant", "zerofill");
+
+ json_array_begin (&json_ctx, "ifuncs");
+ FOR_EACH_IMPL (impl, 0)
+ json_element_string (&json_ctx, impl->name);
+ json_array_end (&json_ctx);
+
+ json_array_begin (&json_ctx, "results");
+
+ // To analyze zero fill performance by comparing among the following 4
+ // patterns from 256B to 64MB(RAM) through L1, L2 and L3 caches.
+ // - zero-over-zero: c1=0, c2=0
+ // - zero-over-one: c1=0, c2=1
+ // - one-over-zero: c1=1, c2=0
+ // - one-over-one: c1=1, c2=1
+ for (c1 = 0; c1 < 2; c1++)
+ for (c2 = 0; c2 < 2; c2++)
+ for (i = START_SIZE; i <= MIN_PAGE_SIZE; i <<= 1)
+ {
+ do_test (&json_ctx, 0, c1, c2, i);
+ do_test (&json_ctx, 3, c1, c2, i);
+ }
+
+ json_array_end (&json_ctx);
+ json_attr_object_end (&json_ctx);
+ json_attr_object_end (&json_ctx);
+ json_document_end (&json_ctx);
+
+ return ret;
+}
+
+#include <support/test-driver.c>
+
+#define libc_hidden_builtin_def(X)
+#define libc_hidden_def(X)
+#define libc_hidden_weak(X)
+#define weak_alias(X,Y)
+#undef MEMSET
+#define MEMSET generic_memset
+#include <string/memset.c>
--
2.17.1
More information about the Libc-alpha
mailing list