This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [v3] Add math-inline benchmark

From: Siddhesh Poyarekar <sid at reserved-bit dot com>
To: Wilco Dijkstra <wdijkstr at arm dot com>, 'GNU C Library' <libc-alpha at sourceware dot org>
Date: Wed, 16 Sep 2015 12:35:04 +0530
Subject: Re: [v3] Add math-inline benchmark
Authentication-results: sourceware.org; auth=none
References: <000001d0da84$c84110f0$58c332d0$ at com>
Sorry, I assumed someone else (Carlos?) was reviewing this and then
forgot about it.  I've put review comments inline.  The major concern I
have is that you're duplicating contents of math_private.h to compare
it against the gcc builtins, which is fine as a first hack if you want
to avoid changing actual code for the tests, but may not be a good idea
in the long term.  That is, there is a risk that the inlines may be
modified but this test doesn't realize that.

I don't mind this going in as a first revision (with other changes
recommended below) provided you're willing to remedy this situation in
the next iteration so that there is at least a failure in running the
benchtest if the definitions of the inlines change in any way.

Siddhesh


On Wed, 2015-08-19 at 14:41 +0100, Wilco Dijkstra wrote:
> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index 8e615e5..91970f8 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -36,6 +36,7 @@ string-bench := bcopy bzero memccpy memchr memcmp
> memcpy memmem memmove \
>  		strncasecmp strncat strncmp strncpy strnlen strpbrk
> strrchr \
>  		strspn strstr strcpy_chk stpcpy_chk memrchr strsep
> strtok \
>  		strcoll
> +

Unnecessary hunk.

>  string-bench-all := $(string-bench)
>  
>  # We have to generate locales
> @@ -50,7 +51,10 @@ stdlib-bench := strtod
>  
>  stdio-common-bench := sprintf
>  
> -benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common
> -bench)
> +math-benchset := math-inlines
> +
> +benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common
> -bench) \
> +	    $(math-benchset)
>  
>  CFLAGS-bench-ffs.c += -fno-builtin
>  CFLAGS-bench-ffsll.c += -fno-builtin
> @@ -58,6 +62,7 @@ CFLAGS-bench-ffsll.c += -fno-builtin
>  bench-malloc := malloc-thread
>  
>  $(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
> +$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm)
>  $(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread
> -library)
>  $(objpfx)bench-malloc-thread: $(shared-thread-library)
>  
> diff --git a/benchtests/bench-math-inlines.c b/benchtests/bench-math
> -inlines.c
> new file mode 100644
> index 0000000..1251935
> --- /dev/null
> +++ b/benchtests/bench-math-inlines.c
> @@ -0,0 +1,284 @@
> +/* Measure math inline functions.
> +   Copyright (C) 2015 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it
> and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later
> version.
> +
> +   The GNU C Library is distributed in the hope that it will be
> useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.ÂÂ*/
> +
> +#define SIZE 1024
> +#define TEST_MAIN
> +#define TEST_NAME "math-inlines"
> +#define TEST_FUNCTION test_main ()
> +#include "bench-timing.h"
> +#include "json-lib.h"
> +#include "bench-util.h"
> +
> +#include <stdlib.h>
> +#include <math.h>
> +#include <stdint.h>
> +
> +#define BOOLTEST(func)					  \
> +int __attribute__((noinline))				  \

Make this function and others below static.

> +func ## _f (double d, int i)				  \
> +{							  \
> +  if (func (d))						  \
> +    return (int)d + i;					  \

Space after (int).

> +  else							  \
> +    return 5;						  \
> +}							  \
> +int							  \
> +func ## _t (volatile double *p, size_t n, size_t iters)   \
> +{							  \
> +  int i, j;						  \
> +  int res = 0;						  \
> +  for (j = 0; j < iters; j++)				  \
> +    for (i = 0; i < n; i++)				  \
> +      if (func ## _f (p[i] * 2.0, i))			  \

Avoid implicit boolean coercion.

> +	res += 5;					  \
> +  return res;						  \
> +}
> +
> +#define VALUETEST(func)					  \
> +int __attribute__((noinline))				  \
> +func ## _f (double d)					  \
> +{							  \
> +  return func (d);					  \
> +}							  \
> +int							  \
> +func ## _t (volatile double *p, size_t n, size_t iters)	  \
> +{							  \
> +  int i, j;						  \
> +  int res = 0;						  \
> +  for (j = 0; j < iters; j++)				  \
> +    for (i = 0; i < n; i++)				  \
> +      res += func ## _f (p[i] * 2.0);			  \
> +  return res;						  \
> +}
> +
> +typedef union
> +{
> +  double value;
> +  uint64_t word;
> +} ieee_double_shape_type;
> +
> +#define EXTRACT_WORDS64(i,d)				  \
> +do {							  \
> +  ieee_double_shape_type gh_u;				  \
> +  gh_u.value = (d);					  \
> +  (i) = gh_u.word;					  \
> +} while (0)
> +
> +/* Inlines similar to existing math_private.h versions.  */

If seems a bit ugly to duplicate code from sysdeps/ieee754/dbl
-64/wordsize-64/math_private.h to test them.  Can you look at another
way to massage math_private.h so that you can do something as simple as
include the file and rewrite the function names using macros?  If it's
too hard or you want to do it in a second pass then please enhance the
comment to reflect that.

> +
> +extern __always_inline int

static __always_inline for this and everything else below.

> +__isnan_inl (double d)
> +{
> +  uint64_t di;
> +  EXTRACT_WORDS64 (di, d);
> +  return (di & 0x7fffffffffffffffull) > 0x7ff0000000000000ull;
> +}
> +
> +extern __always_inline int
> +__isinf_ns (double d)
> +{
> +  uint64_t di;
> +  EXTRACT_WORDS64 (di, d);
> +  return (di & 0x7fffffffffffffffull) == 0x7ff0000000000000ull;
> +}
> +extern __always_inline int
> +__finite_inl (double d)
> +{
> +  uint64_t di;
> +  EXTRACT_WORDS64 (di, d);
> +  return (di & 0x7fffffffffffffffull) < 0x7ff0000000000000ull;
> +}
> +
> +#define __isnormal_inl(X) (__fpclassify (X) == FP_NORMAL)
> +
> +/* Inlines for the builtin functions.  */
> +
> +#define __isnan_builtin(X) __builtin_isnan (X)
> +#define __isinf_ns_builtin(X) __builtin_isinf (X)
> +#define __isinf_builtin(X) __builtin_isinf_sign (X)
> +#define __isfinite_builtin(X) __builtin_isfinite (X)
> +#define __isnormal_builtin(X) __builtin_isnormal (X)
> +#define __fpclassify_builtin(X) __builtin_fpclassify (FP_NAN,
> FP_INFINITE,  \
> +				  FP_NORMAL, FP_SUBNORMAL, FP_ZERO,
> (X))
> +
> +double __attribute ((noinline))
> +kernel_standard (double x, double y, int z)
> +{
> +  return x * y + z;
> +}
> +
> +volatile double rem1 = 2.5;
> +
> +extern __always_inline double
> +remainder_test1 (double x)
> +{
> +  double y = rem1;
> +  if (((__builtin_expect (y == 0.0, 0) && !__isnan_inl (x))
> +	|| (__builtin_expect (__isinf_ns (x), 0) && !__isnan_inl
> (y))))
> +    return kernel_standard (x, y, 10);
> +
> +  return remainder (x, y);
> +}
> +
> +extern __always_inline double
> +remainder_test2 (double x)
> +{
> +  double y = rem1;
> +  if (((__builtin_expect (y == 0.0, 0) && !__builtin_isnan (x))
> +	|| (__builtin_expect (__builtin_isinf (x), 0) &&
> !__builtin_isnan (y))))
> +    return kernel_standard (x, y, 10);
> +
> +  return remainder (x, y);
> +}
> +
> +/* Create test functions for each possibility.  */
> +
> +BOOLTEST (__isnan)
> +BOOLTEST (__isnan_inl)
> +BOOLTEST (__isnan_builtin)
> +BOOLTEST (isnan)
> +
> +BOOLTEST (__isinf)
> +BOOLTEST (__isinf_builtin)
> +BOOLTEST (__isinf_ns)
> +BOOLTEST (__isinf_ns_builtin)
> +BOOLTEST (isinf)
> +
> +BOOLTEST (__finite)
> +BOOLTEST (__finite_inl)
> +BOOLTEST (__isfinite_builtin)
> +BOOLTEST (isfinite)
> +
> +BOOLTEST (__isnormal_inl)
> +BOOLTEST (__isnormal_builtin)
> +BOOLTEST (isnormal)
> +
> +VALUETEST (__fpclassify)
> +VALUETEST (__fpclassify_builtin)
> +VALUETEST (fpclassify)
> +
> +VALUETEST (remainder_test1)
> +VALUETEST (remainder_test2)
> +
> +typedef int (*proto_t) (volatile double *p, size_t n, size_t iters);
> +
> +typedef struct
> +{
> +  const char *name;
> +  proto_t fn;
> +} impl_t;
> +
> +#define IMPL(name) { #name, name ## _t }
> +
> +impl_t test_list[] =

static impl_t test_list.

> +{
> +  IMPL (__isnan),
> +  IMPL (__isnan_inl),
> +  IMPL (__isnan_builtin),
> +  IMPL (isnan),
> +
> +  IMPL (__isinf),
> +  IMPL (__isinf_ns),
> +  IMPL (__isinf_ns_builtin),
> +  IMPL (__isinf_builtin),
> +  IMPL (isinf),
> +
> +  IMPL (__finite),
> +  IMPL (__finite_inl),
> +  IMPL (__isfinite_builtin),
> +  IMPL (isfinite),
> +
> +  IMPL (__isnormal_inl),
> +  IMPL (__isnormal_builtin),
> +  IMPL (isnormal),
> +
> +  IMPL (__fpclassify),
> +  IMPL (__fpclassify_builtin),
> +  IMPL (fpclassify),
> +
> +  IMPL (remainder_test1),
> +  IMPL (remainder_test2)
> +};
> +
> +static void
> +do_one_test (json_ctx_t *json_ctx, proto_t test_fn, volatile double
> *arr,
> +	     size_t len, const char *testname)
> +{
> +  size_t iters = 500;
> +  timing_t start, stop, cur;
> +
> +  json_attr_object_begin (json_ctx, testname);
> +
> +  TIMING_NOW (start);
> +  test_fn (arr, len, iters);
> +  TIMING_NOW (stop);
> +  TIMING_DIFF (cur, start, stop);
> +
> +  json_attr_double (json_ctx, "duration", cur);
> +  json_attr_double (json_ctx, "iterations", iters);
> +  json_attr_double (json_ctx, "mean", cur / iters);
> +  json_attr_object_end (json_ctx);
> +}
> +
> +static volatile double arr1[SIZE];
> +static volatile double arr2[SIZE];
> +
> +int
> +test_main (void)
> +{
> +  json_ctx_t json_ctx;
> +  size_t i;
> +
> +  bench_start ();
> +
> +  json_init (&json_ctx, 2, stdout);
> +  json_attr_object_begin (&json_ctx, "math-inlines");

Use the TEST_NAME macro.

> +
> +  /* Create 2 test arrays, one with 10% zeroes, 10% negative values,
> +     79% positive values and 1% infinity/NaN.  The other contains
> +     50% inf, 50% NaN.  */

How do you know that this is the ratio of generated inputs?  Please
enhance the comment to describe that.

> +
> +  for (i = 0; i < SIZE; i++)
> +    {
> +      int x = rand () & 255;
> +      arr1[i] = (x < 25) ? 0.0 : ((x < 50) ? -1 : 100);
> +      if (x == 255) arr1[i] = __builtin_inf ();
> +      if (x == 254) arr1[i] = __builtin_nan ("0");
> +      arr2[i] = (x < 128) ? __builtin_inf () : __builtin_nan ("0");
> +    }
> +
> +  for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
> +    {
> +      json_attr_object_begin (&json_ctx, test_list[i].name);
> +      do_one_test (&json_ctx, test_list[i].fn, arr2, SIZE,
> "inf/nan");
> +      json_attr_object_end (&json_ctx);
> +    }
> +
> +  for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
> +    {
> +      json_attr_object_begin (&json_ctx, test_list[i].name);
> +      do_one_test (&json_ctx, test_list[i].fn, arr1, SIZE,
> "normal");
> +      json_attr_object_end (&json_ctx);
> +    }
> +
> +  json_attr_object_end (&json_ctx);
> +  return 0;
> +}
> +
> +#include "bench-util.c"
> +#include "../test-skeleton.c"
> diff --git a/benchtests/bench-skeleton.c b/benchtests/bench
> -skeleton.c
> index e357f0c..bc820df 100644
> --- a/benchtests/bench-skeleton.c
> +++ b/benchtests/bench-skeleton.c
> @@ -24,21 +24,9 @@
>  #include <inttypes.h>
>  #include "bench-timing.h"
>  #include "json-lib.h"
> +#include "bench-util.h"
>  
> -volatile unsigned int dontoptimize = 0;
> -
> -void
> -startup (void)
> -{
> -  /* This loop should cause CPU to switch to maximal freqency.
> -     This makes subsequent measurement more accurate.  We need a
> side effect
> -     to prevent the loop being deleted by compiler.
> -     This should be enough to cause CPU to speed up and it is
> simpler than
> -     running loop for constant time. This is used when user does not
> have root
> -     access to set a constant freqency.  */
> -  for (int k = 0; k < 10000000; k++)
> -    dontoptimize += 23 * dontoptimize + 2;
> -}
> +#include "bench-util.c"
>  
>  #define TIMESPEC_AFTER(a, b) \
>    (((a).tv_sec == (b).tv_sec) ?					
> 	      \
> @@ -56,7 +44,7 @@ main (int argc, char **argv)
>    if (argc == 2 && !strcmp (argv[1], "-d"))
>      detailed = true;
>  
> -  startup();
> +  bench_start ();
>  
>    memset (&runtime, 0, sizeof (runtime));
>  
> diff --git a/benchtests/bench-util.h b/benchtests/bench-util.h
> new file mode 100644
> index 0000000..930cecc
> --- /dev/null
> +++ b/benchtests/bench-util.h
> @@ -0,0 +1,28 @@
> +/* Benchmark utility functions.
> +   Copyright (C) 2015 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it
> and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later
> version.
> +
> +   The GNU C Library is distributed in the hope that it will be
> useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.ÂÂ*/
> +
> +
> +#ifndef START_ITER
> +# define START_ITER (100000000)
> +#endif
> +
> +/* bench_start reduces the random variations due to frequency
> scaling by
> +   executing a small loop with many memory accesses.  START_ITER
> controls
> +   the number of iterations.  */
> +
> +void bench_start (void);
Attachment: signature.asc
Description: This is a digitally signed message part
Follow-Ups:
- RE: [v3] Add math-inline benchmark
  - From: Wilco Dijkstra
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]