This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [PATCH v2] Add math-inline benchmark

From: OndÅej BÃlka <neleai at seznam dot cz>
To: Wilco Dijkstra <wdijkstr at arm dot com>
Cc: 'GNU C Library' <libc-alpha at sourceware dot org>
Date: Fri, 17 Jul 2015 00:50:56 +0200
Subject: Re: [PATCH v2] Add math-inline benchmark
Authentication-results: sourceware.org; auth=none
References: <002001d0bfb8$b36fa330$1a4ee990$ at com>
On Thu, Jul 16, 2015 at 12:15:19PM +0100, Wilco Dijkstra wrote:
> Add a benchmark for isinf/isnan/isnormal/isfinite/fpclassify. This new version adds explicit tests
> for the GCC built-ins and uses json format as suggested and no longer includes any string headers.
> The test uses 2 arrays with 1024 doubles, one with 99% finite FP numbers (10% zeroes, 10% negative)
> and 1% inf/NaN, the other with 50% inf, and 50% Nan. 
> 
> Results shows that using the GCC built-ins in math.h gives huge speedups due to avoiding explict
> calls, PLT indirection to execute a function with 3-4 instructions - around 7x on AArch64 and 2.8x
> on x64. The GCC builtins have better performance than the existing math_private inlines for __isnan,
> __finite and __isinf_ns, so these should be removed.
>
No, this benchmark is invalid for following two reasons.

1) It doesn't measure real workload at all. Constructing large constant
could be costy and by inlining this benchmark ignores cost. 
2) Results on x64 don't measure inlines but inferior version as they use
assembly to change double into integer.

As I and Joseph told you multiple times to measure these and I send
benchmarks to demonstrate its effects. So I fixed your benchmark and now
it clearly shows that in all cases math_private inlines are better than
builtins on x64 (Now its x64 only due EXTRACT_WORDS64, you need to sync that with math-private). 
Even remainder is slower.

So at least on x64 we should publish math_private inlines instead using
slow builtins.

I also added better finite,isnormal inlines, these should have same
speed as isnan. Main improvement is multiplication by 2 instead anding
with constant to mask sign bit.

I tried to save bit of space by using 32bit constants
instead 64bit (foo_new2) but it doesn't help.



  "math-inlines": {
   "__isnan_t": {
    "inf/nan": {
     "duration": 1.48695e+07,
     "iterations": 500,
     "mean": 29738
    }
   },
   "__isnan_inl_t": {
    "inf/nan": {
     "duration": 1.12726e+07,
     "iterations": 500,
     "mean": 22545
    }
   },
   "__isnan_builtin_t": {
    "inf/nan": {
     "duration": 1.06417e+07,
     "iterations": 500,
     "mean": 21283
    }
   },
   "isnan_t": {
    "inf/nan": {
     "duration": 1.47359e+07,
     "iterations": 500,
     "mean": 29471
    }
   },
   "isnan_new_t": {
    "inf/nan": {
     "duration": 1.0537e+07,
     "iterations": 500,
     "mean": 21073
    }
   },
   "__isinf_t": {
    "inf/nan": {
     "duration": 1.68862e+07,
     "iterations": 500,
     "mean": 33772
    }
   },
   "__isinf_inl_t": {
    "inf/nan": {
     "duration": 1.14818e+07,
     "iterations": 500,
     "mean": 22963
    }
   },
   "__isinf_ns_t": {
    "inf/nan": {
     "duration": 1.18318e+07,
     "iterations": 500,
     "mean": 23663
    }
   },
   "__isinf_ns_builtin_t": {
    "inf/nan": {
     "duration": 1.20574e+07,
     "iterations": 500,
     "mean": 24114
    }
   },
   "__isinf_builtin_t": {
    "inf/nan": {
     "duration": 1.22987e+07,
     "iterations": 500,
     "mean": 24597
    }
   },
   "isinf_t": {
    "inf/nan": {
     "duration": 1.68442e+07,
     "iterations": 500,
     "mean": 33688
    }
   },
   "isinf_new_t": {
    "inf/nan": {
     "duration": 1.26923e+07,
     "iterations": 500,
     "mean": 25384
    }
   },
   "isinf_new2_t": {
    "inf/nan": {
     "duration": 1.21769e+07,
     "iterations": 500,
     "mean": 24353
    }
   },
   "__finite_t": {
    "inf/nan": {
     "duration": 6.00459e+06,
     "iterations": 500,
     "mean": 12009
    }
   },
   "__finite_inl_t": {
    "inf/nan": {
     "duration": 3.45365e+06,
     "iterations": 500,
     "mean": 6907
    }
   },
   "__isfinite_builtin_t": {
    "inf/nan": {
     "duration": 3.41982e+06,
     "iterations": 500,
     "mean": 6839
    }
   },
   "isfinite_t": {
    "inf/nan": {
     "duration": 5.98703e+06,
     "iterations": 500,
     "mean": 11974
    }
   },
   "finite_new_t": {
    "inf/nan": {
     "duration": 3.41983e+06,
     "iterations": 500,
     "mean": 6839
    }
   },
   "finite_new2_t": {
    "inf/nan": {
     "duration": 3.4199e+06,
     "iterations": 500,
     "mean": 6839
    }
   },
   "__isnormal_inl_t": {
    "inf/nan": {
     "duration": 7.75655e+06,
     "iterations": 500,
     "mean": 15513
    }
   },
   "__isnormal_inl2_t": {
    "inf/nan": {
     "duration": 7.71059e+06,
     "iterations": 500,
     "mean": 15421
    }
   },
   "__isnormal_builtin_t": {
    "inf/nan": {
     "duration": 2.99474e+06,
     "iterations": 500,
     "mean": 5989
    }
   },
   "isnormal_t": {
    "inf/nan": {
     "duration": 7.69216e+06,
     "iterations": 500,
     "mean": 15384
    }
   },
   "isnormal_new_t": {
    "inf/nan": {
     "duration": 3.42363e+06,
     "iterations": 500,
     "mean": 6847
    }
   },
   "isnormal_new2_t": {
    "inf/nan": {
     "duration": 3.01772e+06,
     "iterations": 500,
     "mean": 6035
    }
   },
   "__fpclassify_test1_t": {
    "inf/nan": {
     "duration": 2.31874e+07,
     "iterations": 500,
     "mean": 46374
    }
   },
   "__fpclassify_test2_t": {
    "inf/nan": {
     "duration": 1.78065e+07,
     "iterations": 500,
     "mean": 35613
    }
   },
   "__fpclassify_t": {
    "inf/nan": {
     "duration": 5.13234e+06,
     "iterations": 500,
     "mean": 10264
    }
   },
   "fpclassify_t": {
    "inf/nan": {
     "duration": 5.20095e+06,
     "iterations": 500,
     "mean": 10401
    }
   },
   "remainder_test1_t": {
    "inf/nan": {
     "duration": 2.65477e+07,
     "iterations": 500,
     "mean": 53095
    }
   },
   "remainder_test2_t": {
    "inf/nan": {
     "duration": 2.80853e+07,
     "iterations": 500,
     "mean": 56170
    }
   },
   "__isnan_t": {
    "normal": {
     "duration": 6.50742e+06,
     "iterations": 500,
     "mean": 13014
    }
   },
   "__isnan_inl_t": {
    "normal": {
     "duration": 3.49208e+06,
     "iterations": 500,
     "mean": 6984
    }
   },
   "__isnan_builtin_t": {
    "normal": {
     "duration": 2.65462e+06,
     "iterations": 500,
     "mean": 5309
    }
   },
   "isnan_t": {
    "normal": {
     "duration": 6.47484e+06,
     "iterations": 500,
     "mean": 12949
    }
   },
   "isnan_new_t": {
    "normal": {
     "duration": 2.6487e+06,
     "iterations": 500,
     "mean": 5297
    }
   },
   "__isinf_t": {
    "normal": {
     "duration": 6.50518e+06,
     "iterations": 500,
     "mean": 13010
    }
   },
   "__isinf_inl_t": {
    "normal": {
     "duration": 3.15952e+06,
     "iterations": 500,
     "mean": 6319
    }
   },
   "__isinf_ns_t": {
    "normal": {
     "duration": 3.51585e+06,
     "iterations": 500,
     "mean": 7031
    }
   },
   "__isinf_ns_builtin_t": {
    "normal": {
     "duration": 3.51377e+06,
     "iterations": 500,
     "mean": 7027
    }
   },
   "__isinf_builtin_t": {
    "normal": {
     "duration": 4.36361e+06,
     "iterations": 500,
     "mean": 8727
    }
   },
   "isinf_t": {
    "normal": {
     "duration": 6.51039e+06,
     "iterations": 500,
     "mean": 13020
    }
   },
   "isinf_new_t": {
    "normal": {
     "duration": 3.09707e+06,
     "iterations": 500,
     "mean": 6194
    }
   },
   "isinf_new2_t": {
    "normal": {
     "duration": 3.11053e+06,
     "iterations": 500,
     "mean": 6221
    }
   },
   "__finite_t": {
    "normal": {
     "duration": 3.68569e+07,
     "iterations": 500,
     "mean": 73713
    }
   },
   "__finite_inl_t": {
    "normal": {
     "duration": 3.42074e+07,
     "iterations": 500,
     "mean": 68414
    }
   },
   "__isfinite_builtin_t": {
    "normal": {
     "duration": 3.43805e+07,
     "iterations": 500,
     "mean": 68760
    }
   },
   "isfinite_t": {
    "normal": {
     "duration": 3.67975e+07,
     "iterations": 500,
     "mean": 73595
    }
   },
   "finite_new_t": {
    "normal": {
     "duration": 3.40305e+07,
     "iterations": 500,
     "mean": 68061
    }
   },
   "finite_new2_t": {
    "normal": {
     "duration": 3.40128e+07,
     "iterations": 500,
     "mean": 68025
    }
   },
   "__isnormal_inl_t": {
    "normal": {
     "duration": 3.87965e+07,
     "iterations": 500,
     "mean": 77592
    }
   },
   "__isnormal_inl2_t": {
    "normal": {
     "duration": 3.87941e+07,
     "iterations": 500,
     "mean": 77588
    }
   },
   "__isnormal_builtin_t": {
    "normal": {
     "duration": 3.61693e+07,
     "iterations": 500,
     "mean": 72338
    }
   },
   "isnormal_t": {
    "normal": {
     "duration": 3.87878e+07,
     "iterations": 500,
     "mean": 77575
    }
   },
   "isnormal_new_t": {
    "normal": {
     "duration": 3.45548e+07,
     "iterations": 500,
     "mean": 69109
    }
   },
   "isnormal_new2_t": {
    "normal": {
     "duration": 3.41735e+07,
     "iterations": 500,
     "mean": 68347
    }
   },
   "__fpclassify_test1_t": {
    "normal": {
     "duration": 8.74787e+06,
     "iterations": 500,
     "mean": 17495
    }
   },
   "__fpclassify_test2_t": {
    "normal": {
     "duration": 3.17414e+06,
     "iterations": 500,
     "mean": 6348
    }
   },
   "__fpclassify_t": {
    "normal": {
     "duration": 6.0656e+06,
     "iterations": 500,
     "mean": 12131
    }
   },
   "fpclassify_t": {
    "normal": {
     "duration": 6.07758e+06,
     "iterations": 500,
     "mean": 12155
    }
   },
   "remainder_test1_t": {
    "normal": {
     "duration": 2.54391e+07,
     "iterations": 500,
     "mean": 50878
    }
   },
   "remainder_test2_t": {
    "normal": {
     "duration": 2.65189e+07,
     "iterations": 500,
     "mean": 53037
    }
   }
  }


diff --git a/benchtests/Makefile b/benchtests/Makefile
index 8e615e5..51de91e 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -36,6 +36,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
 		strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
 		strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
 		strcoll
+
 string-bench-all := $(string-bench)
 
 # We have to generate locales
@@ -50,7 +51,9 @@ stdlib-bench := strtod
 
 stdio-common-bench := sprintf
 
-benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common-bench)
+math-benchset := math-inlines
+
+benchset := $(math-benchset) $(string-bench-all) $(stdlib-bench) $(stdio-common-bench) 
 
 CFLAGS-bench-ffs.c += -fno-builtin
 CFLAGS-bench-ffsll.c += -fno-builtin
@@ -58,6 +61,7 @@ CFLAGS-bench-ffsll.c += -fno-builtin
 bench-malloc := malloc-thread
 
 $(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
+$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm)
 $(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library)
 $(objpfx)bench-malloc-thread: $(shared-thread-library)
 
diff --git a/benchtests/bench-math-inlines.c b/benchtests/bench-math-inlines.c
new file mode 100644
index 0000000..222fdc3
--- /dev/null
+++ b/benchtests/bench-math-inlines.c
@@ -0,0 +1,461 @@
+/* Measure math inline functions.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SIZE 1024
+#define TEST_MAIN
+#define TEST_NAME "math-inlines"
+#define TEST_FUNCTION test_main ()
+#include "bench-timing.h"
+#include "json-lib.h"
+
+#include <stdlib.h>
+#include <math.h>
+#include <stdint.h>
+
+
+#define BOOLTEST(func)					  \
+int __attribute__((noinline)) func## _t_t (double d)	  \
+{							  \
+  if (func(d))						  \
+    return 3 * sin (d);					  \
+  else							  \
+    return 2;						  \
+}							  \
+int							  \
+func ## _t (volatile double *p, size_t n, size_t iters)   \
+{							  \
+  int i, j;						  \
+  int res = 0;						  \
+  for (j = 0; j < iters; j++)				  \
+    for (i = 0; i < n; i++)				  \
+      res += func## _t_t (2.0 * p[i]); 			  \
+  return res;						  \
+}
+
+#define VALUETEST(func)					  \
+int							  \
+func ## _t (volatile double *p, size_t n, size_t iters)	  \
+{							  \
+  int i, j;						  \
+  int res = 0;						  \
+  for (j = 0; j < iters; j++)				  \
+    for (i = 0; i < n; i++)				  \
+      { double tmp = p[i] * 2.0;			  \
+	if (func (tmp)) res += 5; }			  \
+  return res;						  \
+}
+
+typedef union
+{
+  double value;
+  uint64_t word;
+} ieee_double_shape_type;
+
+
+
+#define EXTRACT_WORDS64(i, d)                                                 \
+  do {                                                                        \
+    int64_t i_;                                                               \
+    asm ("movq %1, %0" : "=rm" (i_) : "x" ((double) (d)));                   \
+    (i) = i_;                                                                 \
+  } while (0)
+
+extern __always_inline int
+isinf_new (double dx)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, dx);
+
+  if (__builtin_expect ((di << 12) != 0, 1))
+    return 0;                                
+
+  return (int) (di >> 32);
+}
+
+extern __always_inline int
+isinf_new2 (double dx)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, dx);
+
+  if (__builtin_expect(((unsigned int)(di >> 31)) != 0xffe00000ull,1))
+    return 0;
+  if ((int)di)
+    return 0;
+
+  return (int) (di >> 32);
+}
+
+
+
+
+extern __always_inline int
+finite_new (double dx)
+{ 
+  uint64_t di;
+  EXTRACT_WORDS64 (di, dx);
+
+  return (2 * di) < 2 * 0x7ff0000000000000ull;
+}
+
+extern __always_inline int
+finite_new2 (double dx)
+{ 
+  uint64_t di;
+  EXTRACT_WORDS64 (di, dx);
+
+  return ((unsigned int)(di >> 31)) < 2 * 0x7ff00000ull;
+}
+
+
+
+extern __always_inline int
+isnan_new (double dx)
+{ 
+  uint64_t di;
+  EXTRACT_WORDS64 (di, dx);
+  
+  return (2 * di) > 2 * 0x7ff0000000000000ull;
+}
+
+
+
+extern __always_inline int
+isnormal_new (double dx)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, dx);
+
+  return (((2 * di) >> 53)  - 1 < 2046);;
+}
+
+extern __always_inline int
+isnormal_new2 (double dx)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, dx);
+
+  return (2 * di - (1UL << 53) < (2046UL << 53));
+}
+
+/* Explicit inlines similar to math_private.h versions.  */
+
+extern __always_inline int
+__isnan_inl (double d)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, d);
+  return (di & 0x7fffffffffffffffull) > 0x7ff0000000000000ull;
+}
+
+extern __always_inline int
+__isnan_builtin (double d)
+{
+  return __builtin_isnan (d);
+}
+
+extern __always_inline int
+__isinf_inl (double x)
+{
+  uint64_t ix;
+  EXTRACT_WORDS64 (ix,x);
+  if ((ix << 1) != 0xffe0000000000000ull)
+    return 0;
+  return (int)(ix >> 32);
+}
+
+extern __always_inline int
+__isinf_ns (double d)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, d);
+  return (di & 0x7fffffffffffffffull) == 0x7ff0000000000000ull;
+}
+
+extern __always_inline int
+__isinf_ns_builtin (double d)
+{
+  return __builtin_isinf (d);
+}
+
+extern __always_inline int
+__isinf_builtin (double d)
+{
+  return __builtin_isinf_sign (d);
+}
+
+
+extern __always_inline int
+__finite_inl (double d)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, d);
+  return (di & 0x7fffffffffffffffull) < 0x7ff0000000000000ull;
+}
+
+extern __always_inline int
+__isfinite_builtin (double d)
+{
+  return __builtin_isfinite (d);
+}
+
+
+/* Explicit inline similar to existing math.h implementation.  */
+
+#define __isnormal_inl(X) (__fpclassify (X) == FP_NORMAL)
+#define __isnormal_inl2(X) (fpclassify (X) == FP_NORMAL)
+
+extern __always_inline int
+__isnormal_builtin (double d)
+{
+  return __builtin_isnormal (d);
+}
+
+/* Test fpclassify with use of only 2 of the 5 results.  */
+
+extern __always_inline int
+__fpclassify_test1 (double d)
+{
+  int cl = fpclassify (d);
+  return cl == FP_NAN || cl == FP_INFINITE;
+}
+
+extern __always_inline int
+__fpclassify_test2 (double d)
+{
+  return __builtin_isnan (d) || __builtin_isinf (d);
+}
+
+double __attribute ((noinline))
+kernel_standard (double x, double y, int z)
+{
+  return x * y + z;
+}
+
+double __attribute ((noinline))
+remainder2 (double x, double y)
+{
+  if (((__builtin_expect (y == 0.0, 0) && !__builtin_isnan (x))
+	|| (__builtin_expect (__builtin_isinf (x), 0) && !__builtin_isnan (y))))
+    return kernel_standard (x, y, 10);
+
+  return remainder (x, y);
+}
+
+double __attribute ((noinline))
+remainder1 (double x, double y)
+{
+  if (((__builtin_expect (y == 0.0, 0) && !__isnan_inl (x))
+       || (__builtin_expect (__isinf_inl (x), 0) && !__isnan_inl (y))))
+    return kernel_standard (x, y, 10);
+
+  return remainder (x, y);
+}
+
+volatile double rem1 = 2.5;
+
+extern __always_inline int
+remainder_test1 (double d)
+{
+  return remainder1 (d, rem1);
+}
+
+extern __always_inline int
+remainder_test2 (double d)
+{
+  return remainder2 (d, rem1);
+}
+
+/* Create test functions for each possibility.  */
+
+BOOLTEST (__isnan)
+BOOLTEST (__isnan_inl)
+BOOLTEST (__isnan_builtin)
+BOOLTEST (isnan)
+BOOLTEST (isnan_new)
+
+BOOLTEST (__isinf)
+BOOLTEST (__isinf_inl)
+BOOLTEST (__isinf_ns)
+BOOLTEST (__isinf_ns_builtin)
+BOOLTEST (__isinf_builtin)
+BOOLTEST (isinf)
+BOOLTEST (isinf_new)
+BOOLTEST (isinf_new2)
+
+
+BOOLTEST (__finite)
+BOOLTEST (__finite_inl)
+BOOLTEST (__isfinite_builtin)
+BOOLTEST (isfinite)
+BOOLTEST (finite_new)
+BOOLTEST (finite_new2)
+
+
+BOOLTEST (__isnormal_inl)
+BOOLTEST (__isnormal_inl2)
+BOOLTEST (__isnormal_builtin)
+BOOLTEST (isnormal)
+BOOLTEST (isnormal_new)
+BOOLTEST (isnormal_new2)
+
+
+
+BOOLTEST (__fpclassify_test1)
+BOOLTEST (__fpclassify_test2)
+VALUETEST (__fpclassify)
+VALUETEST (fpclassify)
+
+BOOLTEST (remainder_test1)
+BOOLTEST (remainder_test2)
+
+typedef int (*proto_t) (volatile double *p, size_t n, size_t iters);
+
+typedef struct
+{
+  const char *name;
+  proto_t fn;
+} impl_t;
+
+#define IMPL(name) { #name, name }
+
+impl_t test_list[] =
+{
+  IMPL (__isnan_t),
+  IMPL (__isnan_inl_t),
+  IMPL (__isnan_builtin_t),
+  IMPL (isnan_t),
+  IMPL (isnan_new_t),
+
+
+  IMPL (__isinf_t),
+  IMPL (__isinf_inl_t),
+  IMPL (__isinf_ns_t),
+  IMPL (__isinf_ns_builtin_t),
+  IMPL (__isinf_builtin_t),
+  IMPL (isinf_t),
+  IMPL (isinf_new_t),
+  IMPL (isinf_new2_t),
+
+
+  IMPL (__finite_t),
+  IMPL (__finite_inl_t),
+  IMPL (__isfinite_builtin_t),
+  IMPL (isfinite_t),
+  IMPL (finite_new_t),
+  IMPL (finite_new2_t),
+
+
+  IMPL (__isnormal_inl_t),
+  IMPL (__isnormal_inl2_t),
+  IMPL (__isnormal_builtin_t),
+  IMPL (isnormal_t),
+  IMPL (isnormal_new_t),
+  IMPL (isnormal_new2_t),
+
+
+  IMPL (__fpclassify_test1_t),
+  IMPL (__fpclassify_test2_t),
+  IMPL (__fpclassify_t),
+  IMPL (fpclassify_t),
+
+  IMPL (remainder_test1_t),
+  IMPL (remainder_test2_t)
+};
+
+static void
+do_one_test (json_ctx_t *json_ctx, proto_t test_fn, volatile double *arr,
+	     size_t len, const char *testname)
+{
+  size_t iters = 500;
+  timing_t start, stop, cur;
+
+  json_attr_object_begin (json_ctx, testname);
+
+  TIMING_NOW (start);
+  test_fn (arr, len, iters);
+  TIMING_NOW (stop);
+  TIMING_DIFF (cur, start, stop);
+
+  json_attr_double (json_ctx, "duration", cur);
+  json_attr_double (json_ctx, "iterations", iters);
+  json_attr_double (json_ctx, "mean", cur / iters);
+  json_attr_object_end (json_ctx);
+}
+
+volatile unsigned int dontoptimize = 0;
+
+void
+startup (void)
+{
+  /* This loop should cause CPU to switch to maximal freqency.
+     This makes subsequent measurement more accurate.  We need a side effect
+     to prevent the loop being deleted by compiler.
+     This should be enough to cause CPU to speed up and it is simpler than
+     running loop for constant time.  This is used when user does not have root
+     access to set a constant freqency.  */
+  for (int k = 0; k < 100000000; k++)
+    dontoptimize += 23 * dontoptimize + 2;
+}
+
+static volatile double arr1[SIZE];
+static volatile double arr2[SIZE];
+
+int
+test_main (void)
+{
+  json_ctx_t json_ctx;
+  size_t i;
+
+  startup ();
+
+  json_init (&json_ctx, 2, stdout);
+  json_attr_object_begin (&json_ctx, "math-inlines");
+
+  /* Create 2 test arrays, one with 10% zeroes, 10% negative values,
+     79% positive values and 1% infinity/NaN.  The other contains
+     50% inf, 50% NaN.  */
+
+  for (i = 0; i < SIZE; i++)
+    {
+      int x = rand () & 255;
+      arr1[i] = (x < 25) ? 0.1 : ((x < 50) ? -1.1 : 100);
+      if (x == 255) arr1[i] = __builtin_inf ();
+      if (x == 254) arr1[i] = __builtin_nan ("0");
+      arr2[i] = (x < 128) ? __builtin_inf () : __builtin_nan ("0");
+    }
+
+  for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
+    {
+      json_attr_object_begin (&json_ctx, test_list[i].name);
+      do_one_test (&json_ctx, test_list[i].fn, arr2, SIZE, "inf/nan");
+      json_attr_object_end (&json_ctx);
+    }
+
+  for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
+    {
+      json_attr_object_begin (&json_ctx, test_list[i].name);
+      do_one_test (&json_ctx, test_list[i].fn, arr1, SIZE, "normal");
+      json_attr_object_end (&json_ctx);
+    }
+
+  json_attr_object_end (&json_ctx);
+  return 0;
+}
+
+#include "../test-skeleton.c"
Follow-Ups:
- RE: [PATCH v2] Add math-inline benchmark
  - From: Wilco Dijkstra
References:
- [PATCH v2] Add math-inline benchmark
  - From: Wilco Dijkstra
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]