This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH v1.1 1/3] Copy over string performance tests - take 2


On Sun, Jun 09, 2013 at 10:09:09PM +0200, Andreas Jaeger wrote:
> On 06/04/2013 03:22 PM, Siddhesh Poyarekar wrote:
> >Hi,
> >
> >I had held back my earlier patch to start moving string performance
> >tests from string/test-* to benchtests because Ondrej expresed
> >interest in writing better tests.  We're close to a freeze and I
> >didn't see any submissions from Ondrej, so I've decided to revive this
> >patch so that we at least have a starting point for string performance
> >tests.
> >
> >This patch introduces a new feature to the benchmark suite where one
> >could run separate set of benchmark tests to get a separate output.
> >This is useful for cases where a benchmark run needs to measure more
> >than just the number of cycles consumed per function call, which is
> >where the default bench.out is useful.
> >
> >As an example use case, I've copied over the memcpy string performance
> >tests, with the correctness tests removed since they're not useful in
> >the benchmark.  I've also added a description to the README.
> >
> >There are also two follow-up patches that I'll post shortly:
> >
> >1) Copy over the remaining string benchmark tests - This is a fairly
> >    large, but mechanical patch.
> >2) Remove measurement-related code from the string tests
> >
> >As a result, we will end up with two distinct sets of tests - the
> >correctness tests run with make check and the benchmark tests run with
> >make bench.
> >
> >Siddhesh
> >
> >	* benchtests/Makefile: Disable parallel execution of targets.
> 
> Why did you disable the parallel execution?

We wouldn't want tests to run in parallel since that would affect
performance numbers.  I've added a comment to explain this.

Here's v1.1, with that and other changes you pointed out.

Thanks,
Siddhesh

	* benchtests/Makefile: Disable parallel execution of targets.
	(string-bench): Add memcpy.
	(benchset): New variable to store a list of benchmark sets.
	(bench-func): Renamed from bench.
	(bench-set): New target.
	(bench): Depend on bench-func and bench-set.
	* benchtests/README: Add section on benchmark sets.
	* benchtests/bench-memcpy-ifunc.c: New file.
	* benchtests/bench-memcpy.c: New file.
	* benchtests/bench-string.h: New file.

diff --git a/benchtests/Makefile b/benchtests/Makefile
index 680440f..27d83f4 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -23,6 +23,13 @@ subdir := benchtests
 bench := acos acosh asin asinh atan atanh cos cosh exp log modf pow rint sin \
 	 sinh tan tanh
 
+# String function benchmarks.
+string-bench := memcpy
+string-bench-ifunc := $(addsuffix -ifunc, $(string-bench))
+string-bench-all := $(string-bench) $(string-bench-ifunc)
+
+benchset := $(string-bench-all)
+
 acos-ARGLIST = double
 acos-RET = double
 LDFLAGS-bench-acos = -lm
@@ -92,10 +99,15 @@ LDFLAGS-bench-tanh = -lm
 # Rules to build and execute the benchmarks.  Do not put any benchmark
 # parameters beyond this point.
 
+# We don't want the benchmark programs to run in parallel since that could
+# affect their performance.
+.NOTPARALLEL:
+
 include ../Makeconfig
 include ../Rules
 
 binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
+binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset))
 
 # The default duration: 10 seconds.
 ifndef BENCH_DURATION
@@ -112,7 +124,7 @@ endif
 
 # This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
 # for all these modules.
-cpp-srcs-left := $(binaries-bench:=.c)
+cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c)
 lib := nonlib
 include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left))
 
@@ -124,8 +136,17 @@ run-bench = $(test-wrapper-env) \
 
 bench-clean:
 	rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench))
+	rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset))
+
+bench: bench-set bench-func
+
+bench-set: $(binaries-benchset)
+	for run in $^; do \
+	  echo "Running $${run}"; \
+	  $(run-bench) > $${run}.out; \
+	done
 
-bench: $(binaries-bench)
+bench-func: $(binaries-bench)
 	{ for run in $^; do \
 	  echo "Running $${run}" >&2; \
 	  $(run-bench); \
@@ -135,7 +156,7 @@ bench: $(binaries-bench)
 	fi; \
 	mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out
 
-$(binaries-bench): %: %.o \
+$(binaries-bench) $(binaries-benchset): %: %.o \
   $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
   $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
 	$(+link)
diff --git a/benchtests/README b/benchtests/README
index 8135069..045b7a6 100644
--- a/benchtests/README
+++ b/benchtests/README
@@ -72,3 +72,18 @@ the same file by using the `name' directive that looks something like this:
 
 See the pow-inputs file for an example of what such a partitioned input file
 would look like.
+
+Benchmark Sets:
+==============
+
+In addition to standard benchmarking of functions, one may also generate
+custom outputs for a set of functions.  This is currently used by string
+function benchmarks where the aim is to compare performance between
+implementations at various alignments and for various sizes.
+
+To add a benchset for `foo':
+
+- Add `foo' to the benchset variable.
+- Write your bench-foo.c that prints out the measurements to stdout.
+- On execution, a bench-foo.out is created in $(objpfx) with the contents of
+  stdout.
diff --git a/benchtests/bench-memcpy-ifunc.c b/benchtests/bench-memcpy-ifunc.c
new file mode 100644
index 0000000..b5a89f7
--- /dev/null
+++ b/benchtests/bench-memcpy-ifunc.c
@@ -0,0 +1,20 @@
+/* Measure IFUNC implementations of memcpy function.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TEST_IFUNC 1
+#include "bench-memcpy.c"
diff --git a/benchtests/bench-memcpy.c b/benchtests/bench-memcpy.c
new file mode 100644
index 0000000..1b12671
--- /dev/null
+++ b/benchtests/bench-memcpy.c
@@ -0,0 +1,163 @@
+/* Measure memcpy functions.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef MEMCPY_RESULT
+# define MEMCPY_RESULT(dst, len) dst
+# define MIN_PAGE_SIZE 131072
+# define TEST_MAIN
+# define TEST_NAME "memcpy"
+# include "bench-string.h"
+
+char *simple_memcpy (char *, const char *, size_t);
+char *builtin_memcpy (char *, const char *, size_t);
+
+IMPL (simple_memcpy, 0)
+IMPL (builtin_memcpy, 0)
+IMPL (memcpy, 1)
+
+char *
+simple_memcpy (char *dst, const char *src, size_t n)
+{
+  char *ret = dst;
+  while (n--)
+    *dst++ = *src++;
+  return ret;
+}
+
+char *
+builtin_memcpy (char *dst, const char *src, size_t n)
+{
+  return __builtin_memcpy (dst, src, n);
+}
+#endif
+
+typedef char *(*proto_t) (char *, const char *, size_t);
+
+static void
+do_one_test (impl_t *impl, char *dst, const char *src,
+	     size_t len)
+{
+  if (CALL (impl, dst, src, len) != MEMCPY_RESULT (dst, len))
+    {
+      error (0, 0, "Wrong result in function %s %p %p", impl->name,
+	     CALL (impl, dst, src, len), MEMCPY_RESULT (dst, len));
+      ret = 1;
+      return;
+    }
+
+  if (memcmp (dst, src, len) != 0)
+    {
+      error (0, 0, "Wrong result in function %s dst \"%s\" src \"%s\"",
+	     impl->name, dst, src);
+      ret = 1;
+      return;
+    }
+
+  if (HP_TIMING_AVAIL)
+    {
+      hp_timing_t start __attribute ((unused));
+      hp_timing_t stop __attribute ((unused));
+      hp_timing_t best_time = ~ (hp_timing_t) 0;
+      size_t i;
+
+      for (i = 0; i < 32; ++i)
+	{
+	  HP_TIMING_NOW (start);
+	  CALL (impl, dst, src, len);
+	  HP_TIMING_NOW (stop);
+	  HP_TIMING_BEST (best_time, start, stop);
+	}
+
+      printf ("\t%zd", (size_t) best_time);
+    }
+}
+
+static void
+do_test (size_t align1, size_t align2, size_t len)
+{
+  size_t i, j;
+  char *s1, *s2;
+
+  align1 &= 63;
+  if (align1 + len >= page_size)
+    return;
+
+  align2 &= 63;
+  if (align2 + len >= page_size)
+    return;
+
+  s1 = (char *) (buf1 + align1);
+  s2 = (char *) (buf2 + align2);
+
+  for (i = 0, j = 1; i < len; i++, j += 23)
+    s1[i] = j;
+
+  if (HP_TIMING_AVAIL)
+    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, s2, s1, len);
+
+  if (HP_TIMING_AVAIL)
+    putchar ('\n');
+}
+
+int
+test_main (void)
+{
+  size_t i;
+
+  test_init ();
+
+  printf ("%23s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  for (i = 0; i < 18; ++i)
+    {
+      do_test (0, 0, 1 << i);
+      do_test (i, 0, 1 << i);
+      do_test (0, i, 1 << i);
+      do_test (i, i, 1 << i);
+    }
+
+  for (i = 0; i < 32; ++i)
+    {
+      do_test (0, 0, i);
+      do_test (i, 0, i);
+      do_test (0, i, i);
+      do_test (i, i, i);
+    }
+
+  for (i = 3; i < 32; ++i)
+    {
+      if ((i & (i - 1)) == 0)
+	continue;
+      do_test (0, 0, 16 * i);
+      do_test (i, 0, 16 * i);
+      do_test (0, i, 16 * i);
+      do_test (i, i, 16 * i);
+    }
+
+  do_test (0, 0, getpagesize ());
+
+  return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/benchtests/bench-string.h b/benchtests/bench-string.h
new file mode 100644
index 0000000..2fe8d9f
--- /dev/null
+++ b/benchtests/bench-string.h
@@ -0,0 +1,212 @@
+/* Measure string and memory functions.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sys/cdefs.h>
+
+typedef struct
+{
+  const char *name;
+  void (*fn) (void);
+  long test;
+} impl_t;
+extern impl_t __start_impls[], __stop_impls[];
+
+#define IMPL(name, test) \
+  impl_t tst_ ## name							\
+  __attribute__ ((section ("impls"), aligned (sizeof (void *))))	\
+       = { __STRING (name), (void (*) (void))name, test };
+
+#ifdef TEST_MAIN
+
+# ifndef _GNU_SOURCE
+#  define _GNU_SOURCE
+# endif
+
+# undef __USE_STRING_INLINES
+
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+# include <sys/mman.h>
+# include <sys/param.h>
+# include <unistd.h>
+# include <fcntl.h>
+# include <error.h>
+# include <errno.h>
+# include <time.h>
+# include <ifunc-impl-list.h>
+# define GL(x) _##x
+# define GLRO(x) _##x
+# include <hp-timing.h>
+
+
+# define TEST_FUNCTION test_main ()
+# define TIMEOUT (4 * 60)
+# define OPT_ITERATIONS 10000
+# define OPT_RANDOM 10001
+# define OPT_SEED 10002
+
+unsigned char *buf1, *buf2;
+int ret, do_srandom;
+unsigned int seed;
+size_t page_size;
+
+hp_timing_t _dl_hp_timing_overhead;
+
+# ifndef ITERATIONS
+size_t iterations = 100000;
+#  define ITERATIONS_OPTIONS \
+     { "iterations", required_argument, NULL, OPT_ITERATIONS },
+#  define ITERATIONS_PROCESS \
+     case OPT_ITERATIONS:						      \
+       iterations = strtoul (optarg, NULL, 0);				      \
+       break;
+#  define ITERATIONS iterations
+# else
+#  define ITERATIONS_OPTIONS
+#  define ITERATIONS_PROCESS
+# endif
+
+# define CMDLINE_OPTIONS ITERATIONS_OPTIONS \
+    { "random", no_argument, NULL, OPT_RANDOM },			      \
+    { "seed", required_argument, NULL, OPT_SEED },
+# define CMDLINE_PROCESS ITERATIONS_PROCESS \
+    case OPT_RANDOM:							      \
+      {									      \
+	int fdr = open ("/dev/urandom", O_RDONLY);			      \
+									      \
+	if (fdr < 0 || read (fdr, &seed, sizeof(seed)) != sizeof (seed))      \
+	  seed = time (NULL);						      \
+	if (fdr >= 0)							      \
+	  close (fdr);							      \
+	do_srandom = 1;							      \
+	break;								      \
+      }									      \
+									      \
+    case OPT_SEED:							      \
+      seed = strtoul (optarg, NULL, 0);					      \
+      do_srandom = 1;							      \
+      break;
+
+# define CALL(impl, ...)	\
+    (* (proto_t) (impl)->fn) (__VA_ARGS__)
+
+# if defined TEST_IFUNC && defined TEST_NAME
+/* Increase size of FUNC_LIST if assert is triggered at run-time.  */
+static struct libc_ifunc_impl func_list[32];
+static int func_count;
+static int impl_count = -1;
+static impl_t *impl_array;
+
+#  define FOR_EACH_IMPL(impl, notall) \
+     impl_t *impl;							      \
+     int count;								      \
+     if (impl_count == -1)						      \
+       {								      \
+	 impl_count = 0;						      \
+	 if (func_count != 0)						      \
+	   {								      \
+	     int f;							      \
+	     impl_t *skip = NULL, *a;					      \
+	     for (impl = __start_impls; impl < __stop_impls; ++impl)	      \
+	       if (strcmp (impl->name, TEST_NAME) == 0)			      \
+		 skip = impl;						      \
+	       else							      \
+		 impl_count++;						      \
+	     a = impl_array = malloc ((impl_count + func_count) *	      \
+				   sizeof (impl_t));			      \
+	     for (impl = __start_impls; impl < __stop_impls; ++impl)	      \
+	       if (impl != skip)					      \
+		 *a++ = *impl;						      \
+	     for (f = 0; f < func_count; f++)				      \
+	       if (func_list[f].usable)					      \
+		 {							      \
+		   a->name = func_list[f].name;				      \
+		   a->fn = func_list[f].fn;				      \
+		   a->test = 1;						      \
+		   a++;							      \
+		 }							      \
+	     impl_count = a - impl_array;				      \
+	   }								      \
+	 else								      \
+	   {								      \
+	     impl_count = __stop_impls - __start_impls;			      \
+	     impl_array = __start_impls;				      \
+	   }								      \
+       }								      \
+     impl = impl_array;							      \
+     for (count = 0; count < impl_count; ++count, ++impl)		      \
+       if (!notall || impl->test)
+# else /* ! (defined TEST_IFUNC && defined TEST_NAME) */
+#  define FOR_EACH_IMPL(impl, notall) \
+     for (impl_t *impl = __start_impls; impl < __stop_impls; ++impl)	      \
+       if (!notall || impl->test)
+# endif /* ! (defined TEST_IFUNC && defined TEST_NAME) */
+
+# define HP_TIMING_BEST(best_time, start, end)	\
+    do									      \
+      {									      \
+	hp_timing_t tmptime;						      \
+	HP_TIMING_DIFF (tmptime, start + _dl_hp_timing_overhead, end);	      \
+	if (best_time > tmptime)					      \
+	  best_time = tmptime;						      \
+      }									      \
+    while (0)
+
+# ifndef BUF1PAGES
+#  define BUF1PAGES 1
+# endif
+
+static void
+test_init (void)
+{
+# if defined TEST_IFUNC && defined TEST_NAME
+  func_count = __libc_ifunc_impl_list (TEST_NAME, func_list,
+				       (sizeof func_list
+					/ sizeof func_list[0]));
+# endif
+
+  page_size = 2 * getpagesize ();
+# ifdef MIN_PAGE_SIZE
+  if (page_size < MIN_PAGE_SIZE)
+    page_size = MIN_PAGE_SIZE;
+# endif
+  buf1 = mmap (0, (BUF1PAGES + 1) * page_size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE | MAP_ANON, -1, 0);
+  if (buf1 == MAP_FAILED)
+    error (EXIT_FAILURE, errno, "mmap failed");
+  if (mprotect (buf1 + BUF1PAGES * page_size, page_size, PROT_NONE))
+    error (EXIT_FAILURE, errno, "mprotect failed");
+  buf2 = mmap (0, 2 * page_size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE | MAP_ANON, -1, 0);
+  if (buf2 == MAP_FAILED)
+    error (EXIT_FAILURE, errno, "mmap failed");
+  if (mprotect (buf2 + page_size, page_size, PROT_NONE))
+    error (EXIT_FAILURE, errno, "mprotect failed");
+  HP_TIMING_DIFF_INIT ();
+  if (do_srandom)
+    {
+      printf ("Setting seed to 0x%x\n", seed);
+      srandom (seed);
+    }
+
+  memset (buf1, 0xa5, BUF1PAGES * page_size);
+  memset (buf2, 0x5a, page_size);
+}
+
+#endif /* TEST_MAIN */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]