This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Use HP_TIMING for benchmarks if available


Hi,

Here's a patch that adds support for using HP_TIMING in benchmark
measurements when it is available.  clock_gettime is still kept as a
fallback when !HP_TIMING_AVAIL.  Additionally, I've also added support
to override HP_TIMING to use clock_gettime by executing:

make USE_CLOCK_GETTIME=1 bench

One would need a 'make bench-clean' to ensure that the sources are
rebuilt whenever one needs to switch between clock_gettime and
HP_TIMING.  Another easy way is to just touch benchtests/Makefile.

I have verified that the measurements on x6_64 are consistent over
multiple runs, so my concern of scheduler overhead causing jitters was
unfounded.  Jitter due to high system load is unavoidable even with
clock_gettime since (as Rich Felker pointed out in an earlier
discussion) competition for cache will still affect the performance
numbers.

Siddhesh

	* Makeconfig (cflags): Add bench-cflags.
	* benchtests/Makefile: Define bench-cflags if
	USE_CLOCK_GETTIME is defined.
	* benchtests-bench-skeleton.c: Include bench-timing.h.
	(main): Use TIMING_* macros instead of clock_gettime.
	* benchtests/bench-timing.h: New file.

diff --git a/Makeconfig b/Makeconfig
index a3d3e70..a83485f 100644
--- a/Makeconfig
+++ b/Makeconfig
@@ -755,6 +755,9 @@ ifeq	"$(strip $(+cflags))" ""
 +cflags	:= $(default_cflags)
 endif	# $(+cflags) == ""
 
+# Add common benchmark CFLAGS
++cflags += $(bench-cflags)
+
 +cflags += $(cflags-cpu) $(+gccwarn) $(+merge-constants) $(+math-flags)
 +gcc-nowarn := -w
 
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 3e794d7..d330abb 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -109,12 +109,16 @@ LDFLAGS-bench-slowatan = -lm
 # Rules to build and execute the benchmarks.  Do not put any benchmark
 # parameters beyond this point.
 
+ifdef USE_CLOCK_GETTIME
+bench-cflags := -DUSE_CLOCK_GETTIME
+endif
+
 include ../Makeconfig
 include ../Rules
 
 binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
 
-bench-deps := bench-skeleton.c Makefile
+bench-deps := bench-skeleton.c bench-timing.h Makefile
 
 run-bench = $(test-wrapper-env) \
 	    GCONV_PATH=$(common-objpfx)iconvdata LC_ALL=C \
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 13f986d..99a316e 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -17,59 +17,53 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <string.h>
-#include <stdint.h>
 #include <stdio.h>
 #include <time.h>
 #include <inttypes.h>
+#include "bench-timing.h"
 
 int
 main (int argc, char **argv)
 {
   unsigned long i, j, k;
-  uint64_t total = 0, max = 0, min = 0x7fffffffffffffff;
-  struct timespec start, end;
+  timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
+  timing_t start, end;
 
   memset (&start, 0, sizeof (start));
   memset (&end, 0, sizeof (end));
 
-  clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);
+  unsigned long iters;
+
+  TIMING_INIT (iters);
 
-  /* Measure 1000 times the resolution of the clock.  So for a 1ns resolution
-     clock, we measure 1000 iterations of the function call at a time.
-     Measurements close to the minimum clock resolution won't make much sense,
-     but it's better than having nothing at all.  */
-  unsigned long iters = 1000 * start.tv_nsec;
   unsigned long total_iters = ITER / iters;
 
   for (i = 0; i < NUM_SAMPLES; i++)
     {
       for (j = 0; j < total_iters; j ++)
 	{
-	  clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start);
+	  int64_t cur;
+
+	  TIMING_NOW (start);
 	  for (k = 0; k < iters; k++)
 	    BENCH_FUNC(i);
-	  clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end);
-
-	  uint64_t cur = (end.tv_nsec - start.tv_nsec
-			 + ((end.tv_sec - start.tv_sec)
-			    * (uint64_t) 1000000000));
+	  TIMING_NOW (end);
 
+	  TIMING_DIFF (cur, start, end);
 	  if (cur > max)
 	    max = cur;
 
 	  if (cur < min)
 	    min = cur;
 
-	  total += cur;
+	  TIMING_ACCUM (total, cur);
 	}
     }
 
-  double d_total_s = total * 1e-9;
+  double d_total_s = total;
   double d_iters = iters;
   double d_total_i = (double)ITER * NUM_SAMPLES;
-  printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n",
-	  d_total_i, d_total_s, max / d_iters, min / d_iters,
-	  d_total_i / d_total_s);
+  TIMING_PRINT_STATS (d_total_s, d_iters, d_total_i, max, min);
 
   return 0;
 }
diff --git a/benchtests/bench-timing.h b/benchtests/bench-timing.h
new file mode 100644
index 0000000..e67a88d
--- /dev/null
+++ b/benchtests/bench-timing.h
@@ -0,0 +1,72 @@
+/* Define timing macros.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <hp-timing.h>
+#include <stdint.h>
+
+#if HP_TIMING_AVAIL && !defined USE_CLOCK_GETTIME
+# define GL(x) _##x
+# define GLRO(x) _##x
+hp_timing_t _dl_hp_timing_overhead;
+typedef hp_timing_t timing_t;
+
+# define TIMING_INIT(iters) \
+({									      \
+  HP_TIMING_DIFF_INIT();						      \
+  (iters) = 1000;							      \
+})
+
+# define TIMING_NOW(var) HP_TIMING_NOW (var)
+# define TIMING_DIFF(diff, start, end) HP_TIMING_DIFF ((diff), (start), (end))
+# define TIMING_ACCUM(sum, diff) HP_TIMING_ACCUM_NT ((sum), (diff))
+
+# define TIMING_PRINT_STATS(d_total_s, d_iters, d_total_i, max, min) \
+  printf (FUNCNAME ": ITERS:%g: TOTAL:%gC, MAX:%gC, MIN:%gC, %g calls/MC\n",  \
+	  (d_total_i), (d_total_s), (max) / (d_iters), (min) / (d_iters),     \
+	  1e6 * (d_total_i) / (d_total_s));
+
+#else
+typedef uint64_t timing_t;
+
+/* Measure 1000 times the resolution of the clock.  So for a 1ns
+   resolution  clock, we measure 1000 iterations of the function call at a
+   time.  Measurements close to the minimum clock resolution won't make
+   much sense, but it's better than having nothing at all.  */
+# define TIMING_INIT(iters) \
+({									      \
+  struct timespec start;						      \
+  clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);			      \
+  (iters) = 1000 * start.tv_nsec;					      \
+})
+
+# define TIMING_NOW(var) \
+({									      \
+  struct timespec tv;							      \
+  clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &tv);			      \
+  (var) = (uint64_t) (tv.tv_nsec + (uint64_t) 1000000000 * tv.tv_sec);	      \
+})
+
+# define TIMING_DIFF(diff, start, end) (diff) = (end) - (start)
+# define TIMING_ACCUM(sum, diff) (sum) += (diff)
+
+# define TIMING_PRINT_STATS(d_total_s, d_iters, d_total_i, max, min) \
+  printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gs, MIN:%gs, %g iter/s\n",    \
+	  (d_total_i), (d_total_s) * 1e9, (max) / (d_iters),		      \
+	  (min) / (d_iters), (d_total_i) / ((d_total_s) * 1e9))
+
+#endif


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]