This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH] New benchmark strlen-walk

From: Siddhesh Poyarekar <siddhesh at sourceware dot org>
To: libc-alpha at sourceware dot org
Cc: Wilco dot Dijkstra at arm dot com, carlos at redhat dot com
Date: Sat, 11 Aug 2018 00:55:55 +0530
Subject: [PATCH] New benchmark strlen-walk
Hi,

This is a second take at a strlen benchmark and it takes a different
approach from the previous linked list idea.

A comment in the test provides the rationale for the benchmark; to
summarize it focuses on testing strlen with small to medium sized
inputs with different sizes mixed in and walking backwards to try and
trick the prefetcher.  The numbers are kinda stable; I'm not super happy
but they're close enough to make out a general performance
characteristic.

	* benchtests/bench-strlen-walk.c: New benchmark.
	* benchtests/Makefile (string-benchset): Add it.
    
CC: Wilco.Dijkstra@arm.com
CC: carlos@redhat.com

---
 benchtests/Makefile            |   2 +-
 benchtests/bench-strlen-walk.c | 217 +++++++++++++++++++++++++++++++++
 2 files changed, 218 insertions(+), 1 deletion(-)
 create mode 100644 benchtests/bench-strlen-walk.c

diff --git a/benchtests/Makefile b/benchtests/Makefile
index bcd6a9c26d..31cacef373 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -43,7 +43,7 @@ string-benchset := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
 		   strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
 		   strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
 		   strcoll memcpy-large memcpy-random memmove-large memset-large \
-		   memcpy-walk memset-walk memmove-walk
+		   memcpy-walk memset-walk memmove-walk strlen-walk
 
 # Build and run locale-dependent benchmarks only if we're building natively.
 ifeq (no,$(cross-compiling))
diff --git a/benchtests/bench-strlen-walk.c b/benchtests/bench-strlen-walk.c
new file mode 100644
index 0000000000..1ac0ae6fdf
--- /dev/null
+++ b/benchtests/bench-strlen-walk.c
@@ -0,0 +1,217 @@
+/* Measure STRLEN functions - walk through a list of elements and measure
+   string lengths.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* RATIONALE
+   ---------
+
+   The following assumptions are made in this test about strlen usage in the
+   wild:
+
+   - Target strings are small or medium in size, rarely (if ever) very large
+   - In well written code, the target string is not in cache since strlen is
+     among the earliest operations on it.
+
+   This test measures the latency of strlen with a mix of sizes up to a maximum
+   length, for a set of maximum lengths.  The distribution of lengths in each
+   set is logarithmic, with the longest length having one occurrence, it's half
+   having 2, quarter having 4 and so on.  The lengths are further randomized by
+   adding a jitter of up to 8 bytes.  This makes the small string sets
+   completely arbitrary.
+
+   Further, The buffer and its index are reallocated at every run to ensure
+   that there is no cross talk between implementation.  Finally, the list of
+   strings is walked through backwards to try and trick the hardware
+   prefetcher.  There is an issue with this access too though, which is that
+   the most recently touched string ends up getting measured first when we go
+   from front to back, so that introduces some cache side effect.  */
+
+#define TEST_MAIN
+#define MIN_PAGE_SIZE (getpagesize () * 4096)
+#ifndef WIDE
+# define TEST_NAME "strlen"
+#else
+# define TEST_NAME "wcslen"
+#endif
+#include "bench-string.h"
+
+#ifndef WIDE
+# define STRLEN strlen
+# define CHAR char
+# define MAX_CHAR CHAR_MAX
+#else
+# include <wchar.h>
+# define STRLEN wcslen
+# define CHAR wchar_t
+# define MAX_CHAR WCHAR_MAX
+#endif
+
+#include "json-lib.h"
+
+typedef size_t (*proto_t) (const CHAR *);
+
+size_t
+simple_STRLEN (const CHAR *s)
+{
+  const CHAR *p;
+
+  for (p = s; *p; ++p);
+  return p - s;
+}
+
+#ifndef WIDE
+size_t
+builtin_strlen (const CHAR *p)
+{
+  return __builtin_strlen (p);
+}
+IMPL (builtin_strlen, 0)
+#endif
+
+IMPL (simple_STRLEN, 0)
+IMPL (STRLEN, 1)
+
+static unsigned char **str_index;
+
+static void
+do_one_test (json_ctx_t *json_ctx, impl_t *impl, size_t last_str_index)
+{
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (int i = last_str_index - 1; i >= 0; i--)
+    CALL (impl, (char *) str_index[i]);
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  json_element_double (json_ctx, (double) cur / (double) last_str_index);
+}
+
+/* Split the buffer into strings and populate an str_index.  Return
+   the size of the str_index so that it can be iterated backwards   */
+static size_t
+setup_strings (size_t maxlen)
+{
+  unsigned char *p = buf1;
+  size_t orig_maxlen = maxlen, i = 0;
+  int cur_cnt, cnt;
+  size_t logn = 0, m = maxlen;
+
+  while ((m>>=1) > 0)
+    logn++;
+
+  /*Size of the index is buf_size*(2*M-1)/(M*ln(M)) where M is the max len and
+     we have a logarithmic distribution of string sizes, i.e. 1 of maxlen, 2 of
+     maxlen/2, 4 of maxlen/4 and so on.  Round up to avoid buffer overflows.  */
+  size_t index_size = (2 * buf1_size - buf1_size / orig_maxlen) / (logn + 1);
+  index_size *= sizeof (unsigned char *);
+
+  str_index = malloc (index_size);
+
+  if (str_index == NULL)
+    error (1, ENOMEM, "Out of memory\n");
+
+  srand (42);
+  cur_cnt = cnt = 1;
+
+  size_t len = maxlen + rand () % 8;
+
+  while (p < buf1 + buf1_size - len - 1)
+    {
+      str_index[i++] = p;
+      memset (p, 'a', len);
+      p[len] = '\0';
+      p += len + 1;
+
+      cnt--;
+      if (cnt == 0)
+	{
+	  cur_cnt = cur_cnt << 1;
+	  cnt = cur_cnt;
+	  maxlen >>= 1;
+	  if (maxlen == 0)
+	    maxlen = orig_maxlen;
+	}
+      len = maxlen + rand () % 8;
+    }
+
+  return i;
+}
+
+
+static void
+do_test (json_ctx_t *json_ctx, size_t len)
+{
+  json_element_object_begin (json_ctx);
+  json_attr_uint (json_ctx, "length", len);
+  json_array_begin (json_ctx, "timings");
+
+  /* Rebuild everything for each implementation so that we don't have cache
+     side effects across implementations.  */
+  FOR_EACH_IMPL (impl, 0)
+    {
+      size_t i = setup_strings (len);
+      do_one_test (json_ctx, impl, i);
+      alloc_bufs ();
+      free (str_index);
+    }
+
+  json_array_end (json_ctx);
+  json_element_object_end (json_ctx);
+}
+
+int
+test_main (void)
+{
+  json_ctx_t json_ctx;
+  size_t i;
+
+  test_init ();
+
+  json_init (&json_ctx, 0, stdout);
+
+  json_document_begin (&json_ctx);
+  json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
+
+  json_attr_object_begin (&json_ctx, "functions");
+  json_attr_object_begin (&json_ctx, TEST_NAME);
+  json_attr_string (&json_ctx, "bench-variant", "random");
+
+  json_array_begin (&json_ctx, "ifuncs");
+  FOR_EACH_IMPL (impl, 0)
+    json_element_string (&json_ctx, impl->name);
+  json_array_end (&json_ctx);
+
+  json_array_begin (&json_ctx, "results");
+
+  /* The maximum sizes to test.  These are arbitrary.  */
+  const size_t size_ranges[] = {64, 128, 512, 2048, 8192};
+
+  for (i = 0; i < sizeof (size_ranges) / sizeof (size_t); i++)
+    do_test (&json_ctx, size_ranges[i]);
+
+  json_array_end (&json_ctx);
+  json_attr_object_end (&json_ctx);
+  json_attr_object_end (&json_ctx);
+  json_document_end (&json_ctx);
+
+  return ret;
+}
+
+#include <support/test-driver.c>
-- 
2.17.1
Follow-Ups:
- Re: [PATCH] New benchmark strlen-walk
  - From: Wilco Dijkstra
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]