This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH v2] Add math-inline benchmark
- From: "Carlos O'Donell" <carlos at redhat dot com>
- To: Wilco Dijkstra <wdijkstr at arm dot com>, "'Siddhesh Poyarekar'" <siddhesh at redhat dot com>
- Cc: "'GNU C Library'" <libc-alpha at sourceware dot org>
- Date: Mon, 20 Jul 2015 15:23:50 -0400
- Subject: Re: [PATCH v2] Add math-inline benchmark
- Authentication-results: sourceware.org; auth=none
- References: <002001d0bfb8$b36fa330$1a4ee990$ at com> <55A907F6 dot 8000504 at redhat dot com> <20150717143406 dot GG19592 at spoyarek dot pnq dot redhat dot com> <002c01d0c30a$8f23c600$ad6b5200$ at com>
On 07/20/2015 12:38 PM, Wilco Dijkstra wrote:
>> Siddhesh Poyarekar wrote:
>> > On Fri, Jul 17, 2015 at 09:49:42AM -0400, Carlos O'Donell wrote:
>>> > > Maybe we can place this in another C file e.g. bench-util.c and #include that
>>> > > and then call those functions?
>> >
>> > Yes, that is fine.
> I've extracted the start function in bench-util.c, see updated version.
>
> Wilco
One nit, OK to commit with that fixed.
See at the very end.
> 0001-Add-bench-math-inlines.txt
>
>
> ---
> benchtests/Makefile | 7 +-
> benchtests/bench-math-inlines.c | 340 ++++++++++++++++++++++++++++++++++++++++
> benchtests/bench-skeleton.c | 18 +--
> benchtests/bench-util.c | 34 ++++
> benchtests/bench-util.h | 29 ++++
> 5 files changed, 412 insertions(+), 16 deletions(-)
> create mode 100644 benchtests/bench-math-inlines.c
> create mode 100644 benchtests/bench-util.c
> create mode 100644 benchtests/bench-util.h
>
> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index 8e615e5..91970f8 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -36,6 +36,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
> strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
> strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
> strcoll
> +
> string-bench-all := $(string-bench)
>
> # We have to generate locales
> @@ -50,7 +51,10 @@ stdlib-bench := strtod
>
> stdio-common-bench := sprintf
>
> -benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common-bench)
> +math-benchset := math-inlines
> +
> +benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common-bench) \
> + $(math-benchset)
OK.
>
> CFLAGS-bench-ffs.c += -fno-builtin
> CFLAGS-bench-ffsll.c += -fno-builtin
> @@ -58,6 +62,7 @@ CFLAGS-bench-ffsll.c += -fno-builtin
> bench-malloc := malloc-thread
>
> $(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
> +$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm)
> $(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library)
> $(objpfx)bench-malloc-thread: $(shared-thread-library)
>
> diff --git a/benchtests/bench-math-inlines.c b/benchtests/bench-math-inlines.c
> new file mode 100644
> index 0000000..da8a433
> --- /dev/null
> +++ b/benchtests/bench-math-inlines.c
> @@ -0,0 +1,340 @@
> +/* Measure math inline functions.
OK.
> + Copyright (C) 2015 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#define SIZE 1024
> +#define TEST_MAIN
> +#define TEST_NAME "math-inlines"
> +#define TEST_FUNCTION test_main ()
> +#include "bench-timing.h"
> +#include "json-lib.h"
> +#include "bench-util.h"
> +
> +#include <stdlib.h>
> +#include <math.h>
> +#include <stdint.h>
> +
OK.
> +
> +#define BOOLTEST(func) \
> +int \
> +func ## _t (volatile double *p, size_t n, size_t iters) \
> +{ \
> + int i, j; \
> + int res = 0; \
> + for (j = 0; j < iters; j++) \
> + for (i = 0; i < n; i++) \
> + { double tmp = p[i] * 2.0; \
> + if (func (tmp)) res += 5; } \
> + return res; \
> +}
> +
> +#define VALUETEST(func) \
> +int \
> +func ## _t (volatile double *p, size_t n, size_t iters) \
> +{ \
> + int i, j; \
> + int res = 0; \
> + for (j = 0; j < iters; j++) \
> + for (i = 0; i < n; i++) \
> + { double tmp = p[i] * 2.0; \
> + if (func (tmp)) res += 5; } \
> + return res; \
> +}
> +
> +typedef union
> +{
> + double value;
> + uint64_t word;
> +} ieee_double_shape_type;
> +
> +#define EXTRACT_WORDS64(i,d) \
> +do { \
> + ieee_double_shape_type gh_u; \
> + gh_u.value = (d); \
> + (i) = gh_u.word; \
> +} while (0)
> +
> +/* Explicit inlines similar to math_private.h versions. */
> +
> +extern __always_inline int
> +__isnan_inl (double d)
> +{
> + uint64_t di;
> + EXTRACT_WORDS64 (di, d);
> + return (di & 0x7fffffffffffffffull) > 0x7ff0000000000000ull;
> +}
> +
> +extern __always_inline int
> +__isnan_builtin (double d)
> +{
> + return __builtin_isnan (d);
> +}
> +
> +extern __always_inline int
> +__isinf_inl (double x)
> +{
> + uint64_t ix;
> + EXTRACT_WORDS64 (ix,x);
> + if ((ix << 1) != 0xffe0000000000000ull)
> + return 0;
> + return (int)(ix >> 32);
> +}
> +
> +extern __always_inline int
> +__isinf_ns (double d)
> +{
> + uint64_t di;
> + EXTRACT_WORDS64 (di, d);
> + return (di & 0x7fffffffffffffffull) == 0x7ff0000000000000ull;
> +}
> +
> +extern __always_inline int
> +__isinf_ns_builtin (double d)
> +{
> + return __builtin_isinf (d);
> +}
> +
> +extern __always_inline int
> +__isinf_builtin (double d)
> +{
> + return __builtin_isinf_sign (d);
> +}
> +
> +
> +extern __always_inline int
> +__finite_inl (double d)
> +{
> + uint64_t di;
> + EXTRACT_WORDS64 (di, d);
> + return (di & 0x7fffffffffffffffull) < 0x7ff0000000000000ull;
> +}
> +
> +extern __always_inline int
> +__isfinite_builtin (double d)
> +{
> + return __builtin_isfinite (d);
> +}
> +
> +
> +/* Explicit inline similar to existing math.h implementation. */
> +
> +#define __isnormal_inl(X) (__fpclassify (X) == FP_NORMAL)
> +#define __isnormal_inl2(X) (fpclassify (X) == FP_NORMAL)
> +
> +extern __always_inline int
> +__isnormal_builtin (double d)
> +{
> + return __builtin_isnormal (d);
> +}
> +
> +/* Test fpclassify with use of only 2 of the 5 results. */
> +
> +extern __always_inline int
> +__fpclassify_test1 (double d)
> +{
> + int cl = fpclassify (d);
> + return cl == FP_NAN || cl == FP_INFINITE;
> +}
> +
> +extern __always_inline int
> +__fpclassify_test2 (double d)
> +{
> + return __builtin_isnan (d) || __builtin_isinf (d);
> +}
> +
> +double __attribute ((noinline))
> +kernel_standard (double x, double y, int z)
> +{
> + return x * y + z;
> +}
> +
> +double __attribute ((noinline))
> +remainder2 (double x, double y)
> +{
> + if (((__builtin_expect (y == 0.0, 0) && !__builtin_isnan (x))
> + || (__builtin_expect (__builtin_isinf (x), 0) && !__builtin_isnan (y))))
> + return kernel_standard (x, y, 10);
> +
> + return remainder (x, y);
> +}
> +
> +double __attribute ((noinline))
> +remainder1 (double x, double y)
> +{
> + if (((__builtin_expect (y == 0.0, 0) && !__isnan_inl (x))
> + || (__builtin_expect (__isinf_ns (x), 0) && !__isnan_inl (y))))
> + return kernel_standard (x, y, 10);
> +
> + return remainder (x, y);
> +}
> +
> +volatile double rem1 = 2.5;
> +
> +extern __always_inline int
> +remainder_test1 (double d)
> +{
> + return remainder1 (d, rem1);
> +}
> +
> +extern __always_inline int
> +remainder_test2 (double d)
> +{
> + return remainder2 (d, rem1);
> +}
> +
> +/* Create test functions for each possibility. */
> +
> +BOOLTEST (__isnan)
> +BOOLTEST (__isnan_inl)
> +BOOLTEST (__isnan_builtin)
> +BOOLTEST (isnan)
> +
> +BOOLTEST (__isinf)
> +BOOLTEST (__isinf_inl)
> +BOOLTEST (__isinf_ns)
> +BOOLTEST (__isinf_ns_builtin)
> +BOOLTEST (__isinf_builtin)
> +BOOLTEST (isinf)
> +
> +BOOLTEST (__finite)
> +BOOLTEST (__finite_inl)
> +BOOLTEST (__isfinite_builtin)
> +BOOLTEST (isfinite)
> +
> +BOOLTEST (__isnormal_inl)
> +BOOLTEST (__isnormal_inl2)
> +BOOLTEST (__isnormal_builtin)
> +BOOLTEST (isnormal)
> +
> +BOOLTEST (__fpclassify_test1)
> +BOOLTEST (__fpclassify_test2)
> +VALUETEST (__fpclassify)
> +VALUETEST (fpclassify)
> +
> +BOOLTEST (remainder_test1)
> +BOOLTEST (remainder_test2)
> +
> +typedef int (*proto_t) (volatile double *p, size_t n, size_t iters);
> +
> +typedef struct
> +{
> + const char *name;
> + proto_t fn;
> +} impl_t;
> +
> +#define IMPL(name) { #name, name }
> +
> +impl_t test_list[] =
> +{
> + IMPL (__isnan_t),
> + IMPL (__isnan_inl_t),
> + IMPL (__isnan_builtin_t),
> + IMPL (isnan_t),
> +
> + IMPL (__isinf_t),
> + IMPL (__isinf_inl_t),
> + IMPL (__isinf_ns_t),
> + IMPL (__isinf_ns_builtin_t),
> + IMPL (__isinf_builtin_t),
> + IMPL (isinf_t),
> +
> + IMPL (__finite_t),
> + IMPL (__finite_inl_t),
> + IMPL (__isfinite_builtin_t),
> + IMPL (isfinite_t),
> +
> + IMPL (__isnormal_inl_t),
> + IMPL (__isnormal_inl2_t),
> + IMPL (__isnormal_builtin_t),
> + IMPL (isnormal_t),
> +
> + IMPL (__fpclassify_test1_t),
> + IMPL (__fpclassify_test2_t),
> + IMPL (__fpclassify_t),
> + IMPL (fpclassify_t),
> +
> + IMPL (remainder_test1_t),
> + IMPL (remainder_test2_t)
> +};
> +
> +static void
> +do_one_test (json_ctx_t *json_ctx, proto_t test_fn, volatile double *arr,
> + size_t len, const char *testname)
> +{
> + size_t iters = 500;
> + timing_t start, stop, cur;
> +
> + json_attr_object_begin (json_ctx, testname);
> +
> + TIMING_NOW (start);
> + test_fn (arr, len, iters);
> + TIMING_NOW (stop);
> + TIMING_DIFF (cur, start, stop);
> +
> + json_attr_double (json_ctx, "duration", cur);
> + json_attr_double (json_ctx, "iterations", iters);
> + json_attr_double (json_ctx, "mean", cur / iters);
> + json_attr_object_end (json_ctx);
> +}
> +
> +static volatile double arr1[SIZE];
> +static volatile double arr2[SIZE];
> +
> +int
> +test_main (void)
> +{
> + json_ctx_t json_ctx;
> + size_t i;
> +
> + bench_start ();
> +
> + json_init (&json_ctx, 2, stdout);
> + json_attr_object_begin (&json_ctx, "math-inlines");
> +
> + /* Create 2 test arrays, one with 10% zeroes, 10% negative values,
> + 79% positive values and 1% infinity/NaN. The other contains
> + 50% inf, 50% NaN. */
> +
> + for (i = 0; i < SIZE; i++)
> + {
> + int x = rand () & 255;
> + arr1[i] = (x < 25) ? 0.0 : ((x < 50) ? -1 : 100);
> + if (x == 255) arr1[i] = __builtin_inf ();
> + if (x == 254) arr1[i] = __builtin_nan ("0");
> + arr2[i] = (x < 128) ? __builtin_inf () : __builtin_nan ("0");
> + }
> +
> + for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
> + {
> + json_attr_object_begin (&json_ctx, test_list[i].name);
> + do_one_test (&json_ctx, test_list[i].fn, arr2, SIZE, "inf/nan");
> + json_attr_object_end (&json_ctx);
> + }
> +
> + for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
> + {
> + json_attr_object_begin (&json_ctx, test_list[i].name);
> + do_one_test (&json_ctx, test_list[i].fn, arr1, SIZE, "normal");
> + json_attr_object_end (&json_ctx);
> + }
> +
> + json_attr_object_end (&json_ctx);
> + return 0;
> +}
> +
> +#include "bench-util.c"
> +#include "../test-skeleton.c"
> diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
> index e357f0c..bc820df 100644
> --- a/benchtests/bench-skeleton.c
> +++ b/benchtests/bench-skeleton.c
> @@ -24,21 +24,9 @@
> #include <inttypes.h>
> #include "bench-timing.h"
> #include "json-lib.h"
> +#include "bench-util.h"
>
> -volatile unsigned int dontoptimize = 0;
> -
> -void
> -startup (void)
> -{
> - /* This loop should cause CPU to switch to maximal freqency.
> - This makes subsequent measurement more accurate. We need a side effect
> - to prevent the loop being deleted by compiler.
> - This should be enough to cause CPU to speed up and it is simpler than
> - running loop for constant time. This is used when user does not have root
> - access to set a constant freqency. */
> - for (int k = 0; k < 10000000; k++)
> - dontoptimize += 23 * dontoptimize + 2;
> -}
> +#include "bench-util.c"
OK.
>
> #define TIMESPEC_AFTER(a, b) \
> (((a).tv_sec == (b).tv_sec) ? \
> @@ -56,7 +44,7 @@ main (int argc, char **argv)
> if (argc == 2 && !strcmp (argv[1], "-d"))
> detailed = true;
>
> - startup();
> + bench_start ();
OK.
>
> memset (&runtime, 0, sizeof (runtime));
>
> diff --git a/benchtests/bench-util.c b/benchtests/bench-util.c
> new file mode 100644
> index 0000000..c4149ae
> --- /dev/null
> +++ b/benchtests/bench-util.c
> @@ -0,0 +1,34 @@
> +/* Benchmark utility functions.
> + Copyright (C) 2015 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +
> +static volatile unsigned int dontoptimize = 0;
> +
> +void
> +bench_start (void)
> +{
> + /* This loop should cause CPU to switch to maximal freqency.
> + This makes subsequent measurement more accurate. We need a side effect
> + to prevent the loop being deleted by compiler.
> + This should be enough to cause CPU to speed up and it is simpler than
> + running loop for constant time. This is used when user does not have root
> + access to set a constant freqency. */
> +
> + for (int k = 0; k < START_ITER; k++)
> + dontoptimize += 23 * dontoptimize + 2;
> +}
OK.
> diff --git a/benchtests/bench-util.h b/benchtests/bench-util.h
> new file mode 100644
> index 0000000..b30b085
> --- /dev/null
> +++ b/benchtests/bench-util.h
> @@ -0,0 +1,29 @@
> +/* Benchmark utility functions.
> + Copyright (C) 2015 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +
> +#ifndef START_ITER
> +# define START_ITER (100000000)
> +#endif
> +
> +/* bench_start reduces the random variations due to frequency scaling by
> + executing a small loop with many memory accesses. START_ITER controls
> + the number of iterations. */
> +
> +void
> +bench_start (void);
One line please, only function definitions are split on two lines to make
it easier to find e.g. grep '^bench_start' *. Per GNU Coding Standard.
OK with this change.
> -- 1.9.1
Cheers,
Carlos.