This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] Run benchmarks for constant time instead of constant iterations
- From: Siddhesh Poyarekar <siddhesh at redhat dot com>
- To: libc-alpha at sourceware dot org
- Date: Wed, 24 Apr 2013 18:38:39 +0530
- Subject: [PATCH] Run benchmarks for constant time instead of constant iterations
Hi,
The idea to run benchmarks for a constant number of iterations is
problematic. While the benchmarks may run for 10 seconds on x86_64,
they could run for about 30 seconds on powerpc and worse, over 3
minutes on arm. Besides that, adding a new benchmark is cumbersome
since one needs to find out the number of iterations needed for a
sufficient runtime.
A better idea would be to run each benchmark for a specific amount of
time. This patch does just that. The run time defaults to 10 seconds
and it is configurable at command line:
make BENCH_DURATION=5 bench
I have also adjusted the modf benchmark to reflect this change.
Verified that the benchmarks run for the specified amount of time on
x86_64. OK to commit?
Siddhesh
* benchtests/Makefile: Remove *-ITER. Define BENCH_DURATION
in CPPFLAGS.
($(objpfx)bench-%.c): Remove *-ITER.
* benchtests/bench-modf.c: Remove definition of ITER.
* benchtests/bench-skeleton.c (TIMESPEC_AFTER): New macro.
(main): Loop for DURATION seconds instead of fixed number of
iterations.
* scripts/bench.pl: Don't expect iterations in parameters.
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 387fde4..9d25d69 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -23,12 +23,6 @@
# - Append the function name to the bench variable
-# - Define foo-ITER with the number of iterations you want to run. Keep it
-# high enough that the overhead of clock_gettime is only a small fraction of
-# the total run time of the test. A good idea would be to keep the run time
-# of each test at around 10 seconds for x86_64. That is just a guideline,
-# since some scenarios may require higher run times.
-
# - Define foo-ARGLIST as a colon separated list of types of the input
# arguments. Use `void` if function does not take any inputs. Put in quotes
# if the input argument is a pointer, e.g.:
@@ -49,83 +43,70 @@ bench := exp pow rint sin cos tan atan modf \
slowexp slowpow slowsin slowcos slowtan slowatan
# exp function fast path: sysdeps/ieee754/dbl-64/e_exp.c
-exp-ITER = 5e8
exp-ARGLIST = double
exp-RET = double
LDFLAGS-bench-exp = -lm
# pow function fast path: sysdeps/ieee754/dbl-64/e_pow.c
-pow-ITER = 2e8
pow-ARGLIST = double:double
pow-RET = double
LDFLAGS-bench-pow = -lm
-rint-ITER = 250000000
rint-ARGLIST = double
rint-RET = double
LDFLAGS-bench-rint = -lm
# exp function slowest path: sysdeps/ieee754/dbl-64/mpexp.c
-slowexp-ITER = 3e5
slowexp-ARGLIST = double
slowexp-RET = double
slowexp-INCLUDE = slowexp.c
LDFLAGS-bench-slowexp = -lm
# sin function fast path: sysdeps/ieee754/dbl-64/s_sin.c
-sin-ITER = 3e9
sin-ARGLIST = double
sin-RET = double
LDFLAGS-bench-sin = -lm
# cos function fast path: sysdeps/ieee754/dbl-64/s_sin.c
-cos-ITER = 3e9
cos-ARGLIST = double
cos-RET = double
LDFLAGS-bench-cos = -lm
# tan function fast path: sysdeps/ieee754/dbl-64/s_tan.c
-tan-ITER = 3e9
tan-ARGLIST = double
tan-RET = double
LDFLAGS-bench-tan = -lm
# atan function fast path: sysdeps/ieee754/dbl-64/s_atan.c
-atan-ITER = 6e9
atan-ARGLIST = double
atan-RET = double
LDFLAGS-bench-atan = -lm
# pow function slowest path: sysdeps/ieee754/dbl-64/slowpow.c
-slowpow-ITER = 1e5
slowpow-ARGLIST = double:double
slowpow-RET = double
slowpow-INCLUDE = slowpow.c
LDFLAGS-bench-slowpow = -lm
# sin function slowest path: sysdeps/ieee754/dbl-64/sincos32.c
-slowsin-ITER = 3e7
slowsin-ARGLIST = double
slowsin-RET = double
slowsin-INCLUDE = slowsin.c
LDFLAGS-bench-slowsin = -lm
# cos function slowest path: sysdeps/ieee754/dbl-64/sincos32.c
-slowcos-ITER = 3e7
slowcos-ARGLIST = double
slowcos-RET = double
slowcos-INCLUDE = slowcos.c
LDFLAGS-bench-slowcos = -lm
# tan function slowest path: sysdeps/ieee754/dbl-64/mptan.c
-slowtan-ITER = 3e7
slowtan-ARGLIST = double
slowtan-RET = double
slowtan-INCLUDE = slowtan.c
LDFLAGS-bench-slowtan = -lm
# atan function slowest path: sysdeps/ieee754/dbl-64/mpatan.c
-slowatan-ITER = 3e8
slowatan-ARGLIST = double
slowatan-RET = double
slowatan-INCLUDE = slowatan.c
@@ -141,6 +122,13 @@ include ../Rules
binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
+# The default duration: 10 seconds.
+ifndef BENCH_DURATION
+BENCH_DURATION := 10
+endif
+
+CPPFLAGS-nonlib = -DDURATION=$(BENCH_DURATION)
+
# This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
# for all these modules.
cpp-srcs-left := $(binaries-bench:=.c)
@@ -176,5 +164,5 @@ $(objpfx)bench-%.c: %-inputs $(bench-deps)
cat $($*-INCLUDE); \
fi; \
$(..)scripts/bench.pl $(patsubst %-inputs,%,$<) \
- $($*-ITER) $($*-ARGLIST) $($*-RET); } > $@-tmp
+ $($*-ARGLIST) $($*-RET); } > $@-tmp
mv -f $@-tmp $@
diff --git a/benchtests/bench-modf.c b/benchtests/bench-modf.c
index 975a29f..90a5255 100644
--- a/benchtests/bench-modf.c
+++ b/benchtests/bench-modf.c
@@ -33,7 +33,6 @@ struct args
static volatile double ret = 0.0;
#define BENCH_FUNC(j) ({double iptr; ret = CALL_BENCH_FUNC (j, iptr);})
-#define ITER 250000000
#define FUNCNAME "modf"
#include "bench-skeleton.c"
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 13f986d..bbd151b 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -22,13 +22,18 @@
#include <time.h>
#include <inttypes.h>
+#define TIMESPEC_AFTER(a, b) \
+ (((a).tv_sec == (b).tv_sec) ? \
+ ((a).tv_nsec > (b).tv_nsec) : \
+ ((a).tv_sec > (b).tv_sec))
int
main (int argc, char **argv)
{
- unsigned long i, j, k;
+ unsigned long i, k;
uint64_t total = 0, max = 0, min = 0x7fffffffffffffff;
- struct timespec start, end;
+ struct timespec start, end, runtime;
+ memset (&runtime, 0, sizeof (runtime));
memset (&start, 0, sizeof (start));
memset (&end, 0, sizeof (end));
@@ -39,11 +44,15 @@ main (int argc, char **argv)
Measurements close to the minimum clock resolution won't make much sense,
but it's better than having nothing at all. */
unsigned long iters = 1000 * start.tv_nsec;
- unsigned long total_iters = ITER / iters;
- for (i = 0; i < NUM_SAMPLES; i++)
+ /* Run for approxmately DURATION seconds. */
+ clock_gettime (CLOCK_MONOTONIC_RAW, &runtime);
+ runtime.tv_sec += DURATION;
+
+ double d_total_i = 0;
+ while (1)
{
- for (j = 0; j < total_iters; j ++)
+ for (i = 0; i < NUM_SAMPLES; i++)
{
clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start);
for (k = 0; k < iters; k++)
@@ -61,12 +70,25 @@ main (int argc, char **argv)
min = cur;
total += cur;
+
+ d_total_i += iters;
}
+
+ struct timespec curtime;
+
+ memset (&curtime, 0, sizeof (curtime));
+ clock_gettime (CLOCK_MONOTONIC_RAW, &curtime);
+ if (TIMESPEC_AFTER (curtime, runtime))
+ goto done;
}
- double d_total_s = total * 1e-9;
- double d_iters = iters;
- double d_total_i = (double)ITER * NUM_SAMPLES;
+ double d_total_s;
+ double d_iters;
+
+ done:
+ d_total_s = total * 1e-9;
+ d_iters = iters;
+
printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n",
d_total_i, d_total_s, max / d_iters, min / d_iters,
d_total_i / d_total_s);
diff --git a/scripts/bench.pl b/scripts/bench.pl
index bb7f648..5856cfa 100755
--- a/scripts/bench.pl
+++ b/scripts/bench.pl
@@ -22,23 +22,22 @@ use warnings;
# Generate a benchmark source file for a given input.
if (@ARGV < 2) {
- die "Usage: bench.pl <function> <iterations> [parameter types] [return type]"
+ die "Usage: bench.pl <function> [parameter types] [return type]"
}
my $arg;
my $func = $ARGV[0];
-my $iters = $ARGV[1];
my @args;
my $ret = "void";
my $getret = "";
my $retval = "";
-if (@ARGV >= 3) {
- @args = split(':', $ARGV[2]);
+if (@ARGV >= 2) {
+ @args = split(':', $ARGV[1]);
}
-if (@ARGV == 4) {
- $ret = $ARGV[3];
+if (@ARGV == 3) {
+ $ret = $ARGV[2];
}
my $decl = "extern $ret $func (";
@@ -88,6 +87,5 @@ if ($ret ne "void") {
print "#define BENCH_FUNC(j) ({$getret CALL_BENCH_FUNC (j);})\n";
-print "#define ITER $iters\n";
print "#define FUNCNAME \"$func\"\n";
print "#include \"bench-skeleton.c\"\n";